diff --git a/.github/workflows/rust-release.yml b/.github/workflows/rust-release.yml index 11c769d95cb..6d1606d2cfc 100644 --- a/.github/workflows/rust-release.yml +++ b/.github/workflows/rust-release.yml @@ -323,6 +323,26 @@ jobs: - name: Checkout repository uses: actions/checkout@v6 + - name: Generate release notes from tag commit message + id: release_notes + shell: bash + run: | + set -euo pipefail + + # On tag pushes, GITHUB_SHA may be a tag object for annotated tags; + # peel it to the underlying commit. + commit="$(git rev-parse "${GITHUB_SHA}^{commit}")" + notes_path="${RUNNER_TEMP}/release-notes.md" + + # Use the commit message for the commit the tag points at (not the + # annotated tag message). + git log -1 --format=%B "${commit}" > "${notes_path}" + # Ensure trailing newline so GitHub's markdown renderer doesn't + # occasionally run the last line into subsequent content. + echo >> "${notes_path}" + + echo "path=${notes_path}" >> "${GITHUB_OUTPUT}" + - uses: actions/download-artifact@v7 with: path: dist @@ -395,6 +415,7 @@ jobs: with: name: ${{ steps.release_name.outputs.name }} tag_name: ${{ github.ref_name }} + body_path: ${{ steps.release_notes.outputs.path }} files: dist/** # Mark as prerelease only when the version has a suffix after x.y.z # (e.g. -alpha, -beta). Otherwise publish a normal release. diff --git a/AGENTS.md b/AGENTS.md index e924ceeae1b..5c0a6db6374 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -77,11 +77,11 @@ If you don’t have the tool: - Prefer deep equals comparisons whenever possible. Perform `assert_eq!()` on entire objects, rather than individual fields. - Avoid mutating process environment in tests; prefer passing environment-derived flags or dependencies from above. -### Spawning workspace binaries in tests (Cargo vs Buck2) +### Spawning workspace binaries in tests (Cargo vs Bazel) - Prefer `codex_utils_cargo_bin::cargo_bin("...")` over `assert_cmd::Command::cargo_bin(...)` or `escargot` when tests need to spawn first-party binaries. - - Under Buck2, `CARGO_BIN_EXE_*` may be project-relative (e.g. `buck-out/...`), which breaks if a test changes its working directory. `codex_utils_cargo_bin::cargo_bin` resolves to an absolute path first. -- When locating fixture files under Buck2, avoid `env!("CARGO_MANIFEST_DIR")` (Buck codegen sets it to `"."`). Prefer deriving paths from `codex_utils_cargo_bin::buck_project_root()` when needed. + - Under Bazel, binaries and resources may live under runfiles; use `codex_utils_cargo_bin::cargo_bin` to resolve absolute paths that remain stable after `chdir`. +- When locating fixture files or test resources under Bazel, avoid `env!("CARGO_MANIFEST_DIR")`. Prefer `codex_utils_cargo_bin::find_resource!` so paths resolve correctly under both Cargo and Bazel runfiles. ### Integration tests (core) diff --git a/announcement_tip.toml b/announcement_tip.toml new file mode 100644 index 00000000000..3ad4a765904 --- /dev/null +++ b/announcement_tip.toml @@ -0,0 +1,16 @@ +# Example announcement tips for Codex TUI. +# Each [[announcements]] entry is evaluated in order; the last matching one is shown. +# Dates are UTC, formatted as YYYY-MM-DD. The from_date is inclusive and the to_date is exclusive. +# version_regex matches against the CLI version (env!("CARGO_PKG_VERSION")); omit to apply to all versions. +# target_app specify which app should display the announcement (cli, vsce, ...). + +[[announcements]] +content = "Welcome to Codex! Check out the new onboarding flow." +from_date = "2024-10-01" +to_date = "2024-10-15" +target_app = "cli" + +[[announcements]] +content = "This is a test announcement" +version_regex = "^0\\.0\\.0$" +to_date = "2026-01-10" diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 46f1e4ebd56..92ecc7e3b0b 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -360,7 +360,7 @@ dependencies = [ "objc2-foundation", "parking_lot", "percent-encoding", - "windows-sys 0.52.0", + "windows-sys 0.60.2", "wl-clipboard-rs", "x11rb", ] @@ -819,6 +819,8 @@ version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -1127,6 +1129,7 @@ dependencies = [ "codex-common", "codex-core", "codex-git", + "codex-utils-cargo-bin", "serde", "serde_json", "tempfile", @@ -1197,6 +1200,7 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", + "zstd", ] [[package]] @@ -1348,6 +1352,7 @@ dependencies = [ "which", "wildmatch", "wiremock", + "zstd", ] [[package]] @@ -1605,10 +1610,12 @@ dependencies = [ "opentelemetry-otlp", "opentelemetry-semantic-conventions", "opentelemetry_sdk", + "pretty_assertions", "reqwest", "serde", "serde_json", "strum_macros 0.27.2", + "thiserror 2.0.17", "tokio", "tracing", "tracing-opentelemetry", @@ -1820,6 +1827,7 @@ dependencies = [ "pulldown-cmark", "rand 0.9.2", "ratatui", + "ratatui-core", "ratatui-macros", "regex-lite", "reqwest", @@ -1841,6 +1849,7 @@ dependencies = [ "tracing-subscriber", "tree-sitter-bash", "tree-sitter-highlight", + "tui-scrollbar", "unicode-segmentation", "unicode-width 0.2.1", "url", @@ -1864,7 +1873,7 @@ dependencies = [ name = "codex-utils-cache" version = "0.0.0" dependencies = [ - "lru 0.16.2", + "lru 0.16.3", "sha1", "tokio", ] @@ -1874,6 +1883,7 @@ name = "codex-utils-cargo-bin" version = "0.0.0" dependencies = [ "assert_cmd", + "path-absolutize", "thiserror 2.0.17", ] @@ -2005,6 +2015,20 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -2583,6 +2607,15 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "dotenvy" version = "0.15.7" @@ -2756,7 +2789,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -2864,7 +2897,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix 1.0.8", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3805,7 +3838,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3899,6 +3932,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -3909,6 +3952,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kasuari" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fe90c1150662e858c7d5f945089b7517b0a80d8bf7ba4b1b5ffc984e7230a5b" +dependencies = [ + "hashbrown 0.16.0", + "thiserror 2.0.17", +] + [[package]] name = "keyring" version = "3.6.3" @@ -4054,6 +4107,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + [[package]] name = "local-waker" version = "0.1.4" @@ -4110,9 +4169,9 @@ dependencies = [ [[package]] name = "lru" -version = "0.16.2" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f" +checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" dependencies = [ "hashbrown 0.16.0", ] @@ -5290,7 +5349,7 @@ dependencies = [ "once_cell", "socket2 0.6.1", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -5393,7 +5452,7 @@ source = "git+https://github.com/nornagon/ratatui?branch=nornagon-v0.29.0-patch# dependencies = [ "bitflags 2.10.0", "cassowary", - "compact_str", + "compact_str 0.8.1", "crossterm", "indoc", "instability", @@ -5402,7 +5461,27 @@ dependencies = [ "paste", "strum 0.26.3", "unicode-segmentation", - "unicode-truncate", + "unicode-truncate 1.1.0", + "unicode-width 0.2.1", +] + +[[package]] +name = "ratatui-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef8dea09a92caaf73bff7adb70b76162e5937524058a7e5bff37869cbbec293" +dependencies = [ + "bitflags 2.10.0", + "compact_str 0.9.0", + "hashbrown 0.16.0", + "indoc", + "itertools 0.14.0", + "kasuari", + "lru 0.16.3", + "strum 0.27.2", + "thiserror 2.0.17", + "unicode-segmentation", + "unicode-truncate 2.0.0", "unicode-width 0.2.1", ] @@ -5649,7 +5728,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5662,7 +5741,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -6569,6 +6648,9 @@ name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] [[package]] name = "strum_macros" @@ -7415,6 +7497,16 @@ dependencies = [ "termcolor", ] +[[package]] +name = "tui-scrollbar" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42613099915b2e30e9f144670666e858e2538366f77742e1cf1c2f230efcacd" +dependencies = [ + "document-features", + "ratatui-core", +] + [[package]] name = "typenum" version = "1.18.0" @@ -7482,6 +7574,17 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "unicode-truncate" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fbf03860ff438702f3910ca5f28f8dac63c1c11e7efb5012b8b175493606330" +dependencies = [ + "itertools 0.13.0", + "unicode-segmentation", + "unicode-width 0.2.1", +] + [[package]] name = "unicode-width" version = "0.1.14" @@ -7926,7 +8029,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -8724,6 +8827,34 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.4.12" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 5b017a3e36b..1dc043a4fcc 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -152,7 +152,7 @@ landlock = "0.4.4" lazy_static = "1" libc = "0.2.177" log = "0.4" -lru = "0.16.2" +lru = "0.16.3" maplit = "1.0.2" mime_guess = "2.0.5" multimap = "0.10.0" @@ -176,6 +176,7 @@ pretty_assertions = "1.4.1" pulldown-cmark = "0.10" rand = "0.9" ratatui = "0.29.0" +ratatui-core = "0.1.0" ratatui-macros = "0.6.0" regex = "1.12.2" regex-lite = "0.1.8" @@ -217,8 +218,10 @@ tracing-subscriber = "0.3.22" tracing-test = "0.2.5" tree-sitter = "0.25.10" tree-sitter-bash = "0.25" +zstd = "0.13" tree-sitter-highlight = "0.25.10" ts-rs = "11" +tui-scrollbar = "0.2.1" uds_windows = "1.1.0" unicode-segmentation = "1.12.0" unicode-width = "0.2" diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index 83fa53b9973..a841e29205d 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -113,6 +113,10 @@ client_request_definitions! { params: v2::ThreadArchiveParams, response: v2::ThreadArchiveResponse, }, + ThreadRollback => "thread/rollback" { + params: v2::ThreadRollbackParams, + response: v2::ThreadRollbackResponse, + }, ThreadList => "thread/list" { params: v2::ThreadListParams, response: v2::ThreadListResponse, @@ -193,6 +197,11 @@ client_request_definitions! { response: v2::ConfigWriteResponse, }, + ConfigRequirementsRead => "configRequirements/read" { + params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>, + response: v2::ConfigRequirementsReadResponse, + }, + GetAccount => "account/read" { params: v2::GetAccountParams, response: v2::GetAccountResponse, @@ -565,7 +574,7 @@ client_notification_definitions! { mod tests { use super::*; use anyhow::Result; - use codex_protocol::ConversationId; + use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::parse_command::ParsedCommand; use codex_protocol::protocol::AskForApproval; @@ -614,7 +623,7 @@ mod tests { #[test] fn conversation_id_serializes_as_plain_string() -> Result<()> { - let id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?; + let id = ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?; assert_eq!( json!("67e55044-10b1-426f-9247-bb680e5fe0c8"), @@ -625,11 +634,10 @@ mod tests { #[test] fn conversation_id_deserializes_from_plain_string() -> Result<()> { - let id: ConversationId = - serde_json::from_value(json!("67e55044-10b1-426f-9247-bb680e5fe0c8"))?; + let id: ThreadId = serde_json::from_value(json!("67e55044-10b1-426f-9247-bb680e5fe0c8"))?; assert_eq!( - ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?, + ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?, id, ); Ok(()) @@ -650,7 +658,7 @@ mod tests { #[test] fn serialize_server_request() -> Result<()> { - let conversation_id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?; + let conversation_id = ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?; let params = v1::ExecCommandApprovalParams { conversation_id, call_id: "call-42".to_string(), @@ -708,6 +716,22 @@ mod tests { Ok(()) } + #[test] + fn serialize_config_requirements_read() -> Result<()> { + let request = ClientRequest::ConfigRequirementsRead { + request_id: RequestId::Integer(1), + params: None, + }; + assert_eq!( + json!({ + "method": "configRequirements/read", + "id": 1, + }), + serde_json::to_value(&request)?, + ); + Ok(()) + } + #[test] fn serialize_account_login_api_key() -> Result<()> { let request = ClientRequest::LoginAccount { diff --git a/codex-rs/app-server-protocol/src/protocol/thread_history.rs b/codex-rs/app-server-protocol/src/protocol/thread_history.rs index ba1e6261cc6..6fa6dfabbd4 100644 --- a/codex-rs/app-server-protocol/src/protocol/thread_history.rs +++ b/codex-rs/app-server-protocol/src/protocol/thread_history.rs @@ -6,6 +6,7 @@ use crate::protocol::v2::UserInput; use codex_protocol::protocol::AgentReasoningEvent; use codex_protocol::protocol::AgentReasoningRawContentEvent; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::ThreadRolledBackEvent; use codex_protocol::protocol::TurnAbortedEvent; use codex_protocol::protocol::UserMessageEvent; @@ -57,6 +58,7 @@ impl ThreadHistoryBuilder { EventMsg::TokenCount(_) => {} EventMsg::EnteredReviewMode(_) => {} EventMsg::ExitedReviewMode(_) => {} + EventMsg::ThreadRolledBack(payload) => self.handle_thread_rollback(payload), EventMsg::UndoCompleted(_) => {} EventMsg::TurnAborted(payload) => self.handle_turn_aborted(payload), _ => {} @@ -130,6 +132,23 @@ impl ThreadHistoryBuilder { turn.status = TurnStatus::Interrupted; } + fn handle_thread_rollback(&mut self, payload: &ThreadRolledBackEvent) { + self.finish_current_turn(); + + let n = usize::try_from(payload.num_turns).unwrap_or(usize::MAX); + if n >= self.turns.len() { + self.turns.clear(); + } else { + self.turns.truncate(self.turns.len().saturating_sub(n)); + } + + // Re-number subsequent synthetic ids so the pruned history is consistent. + self.next_turn_index = + i64::try_from(self.turns.len().saturating_add(1)).unwrap_or(i64::MAX); + let item_count: usize = self.turns.iter().map(|t| t.items.len()).sum(); + self.next_item_index = i64::try_from(item_count.saturating_add(1)).unwrap_or(i64::MAX); + } + fn finish_current_turn(&mut self) { if let Some(turn) = self.current_turn.take() { if turn.items.is_empty() { @@ -213,6 +232,7 @@ mod tests { use codex_protocol::protocol::AgentMessageEvent; use codex_protocol::protocol::AgentReasoningEvent; use codex_protocol::protocol::AgentReasoningRawContentEvent; + use codex_protocol::protocol::ThreadRolledBackEvent; use codex_protocol::protocol::TurnAbortReason; use codex_protocol::protocol::TurnAbortedEvent; use codex_protocol::protocol::UserMessageEvent; @@ -410,4 +430,95 @@ mod tests { } ); } + + #[test] + fn drops_last_turns_on_thread_rollback() { + let events = vec![ + EventMsg::UserMessage(UserMessageEvent { + message: "First".into(), + images: None, + }), + EventMsg::AgentMessage(AgentMessageEvent { + message: "A1".into(), + }), + EventMsg::UserMessage(UserMessageEvent { + message: "Second".into(), + images: None, + }), + EventMsg::AgentMessage(AgentMessageEvent { + message: "A2".into(), + }), + EventMsg::ThreadRolledBack(ThreadRolledBackEvent { num_turns: 1 }), + EventMsg::UserMessage(UserMessageEvent { + message: "Third".into(), + images: None, + }), + EventMsg::AgentMessage(AgentMessageEvent { + message: "A3".into(), + }), + ]; + + let turns = build_turns_from_event_msgs(&events); + let expected = vec![ + Turn { + id: "turn-1".into(), + status: TurnStatus::Completed, + error: None, + items: vec![ + ThreadItem::UserMessage { + id: "item-1".into(), + content: vec![UserInput::Text { + text: "First".into(), + }], + }, + ThreadItem::AgentMessage { + id: "item-2".into(), + text: "A1".into(), + }, + ], + }, + Turn { + id: "turn-2".into(), + status: TurnStatus::Completed, + error: None, + items: vec![ + ThreadItem::UserMessage { + id: "item-3".into(), + content: vec![UserInput::Text { + text: "Third".into(), + }], + }, + ThreadItem::AgentMessage { + id: "item-4".into(), + text: "A3".into(), + }, + ], + }, + ]; + assert_eq!(turns, expected); + } + + #[test] + fn thread_rollback_clears_all_turns_when_num_turns_exceeds_history() { + let events = vec![ + EventMsg::UserMessage(UserMessageEvent { + message: "One".into(), + images: None, + }), + EventMsg::AgentMessage(AgentMessageEvent { + message: "A1".into(), + }), + EventMsg::UserMessage(UserMessageEvent { + message: "Two".into(), + images: None, + }), + EventMsg::AgentMessage(AgentMessageEvent { + message: "A2".into(), + }), + EventMsg::ThreadRolledBack(ThreadRolledBackEvent { num_turns: 99 }), + ]; + + let turns = build_turns_from_event_msgs(&events); + assert_eq!(turns, Vec::::new()); + } } diff --git a/codex-rs/app-server-protocol/src/protocol/v1.rs b/codex-rs/app-server-protocol/src/protocol/v1.rs index df39f8809e0..981ab28d1b4 100644 --- a/codex-rs/app-server-protocol/src/protocol/v1.rs +++ b/codex-rs/app-server-protocol/src/protocol/v1.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::path::PathBuf; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::config_types::ForcedLoginMethod; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::SandboxMode; @@ -68,7 +68,7 @@ pub struct NewConversationParams { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct NewConversationResponse { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub model: String, pub reasoning_effort: Option, pub rollout_path: PathBuf, @@ -77,7 +77,7 @@ pub struct NewConversationResponse { #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct ResumeConversationResponse { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub model: String, pub initial_messages: Option>, pub rollout_path: PathBuf, @@ -90,9 +90,9 @@ pub enum GetConversationSummaryParams { #[serde(rename = "rolloutPath")] rollout_path: PathBuf, }, - ConversationId { + ThreadId { #[serde(rename = "conversationId")] - conversation_id: ConversationId, + conversation_id: ThreadId, }, } @@ -113,7 +113,7 @@ pub struct ListConversationsParams { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct ConversationSummary { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub path: PathBuf, pub preview: String, pub timestamp: Option, @@ -143,7 +143,7 @@ pub struct ListConversationsResponse { #[serde(rename_all = "camelCase")] pub struct ResumeConversationParams { pub path: Option, - pub conversation_id: Option, + pub conversation_id: Option, pub history: Option>, pub overrides: Option, } @@ -158,7 +158,7 @@ pub struct AddConversationSubscriptionResponse { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct ArchiveConversationParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub rollout_path: PathBuf, } @@ -198,7 +198,7 @@ pub struct GitDiffToRemoteResponse { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct ApplyPatchApprovalParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, /// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent] /// and [codex_core::protocol::PatchApplyEndEvent]. pub call_id: String, @@ -219,7 +219,7 @@ pub struct ApplyPatchApprovalResponse { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct ExecCommandApprovalParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, /// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent] /// and [codex_core::protocol::ExecCommandEndEvent]. pub call_id: String, @@ -369,14 +369,14 @@ pub struct SandboxSettings { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct SendUserMessageParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub items: Vec, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct SendUserTurnParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub items: Vec, pub cwd: PathBuf, pub approval_policy: AskForApproval, @@ -384,6 +384,8 @@ pub struct SendUserTurnParams { pub model: String, pub effort: Option, pub summary: ReasoningSummary, + /// Optional JSON Schema used to constrain the final assistant message for this turn. + pub output_schema: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -393,7 +395,7 @@ pub struct SendUserTurnResponse {} #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct InterruptConversationParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, } #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] @@ -409,7 +411,7 @@ pub struct SendUserMessageResponse {} #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct AddConversationListenerParams { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, #[serde(default)] pub experimental_raw_events: bool, } @@ -443,7 +445,7 @@ pub struct LoginChatGptCompleteNotification { #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct SessionConfiguredNotification { - pub session_id: ConversationId, + pub session_id: ThreadId, pub model: String, pub reasoning_effort: Option, pub history_log_id: u64, diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 7f09216eab3..2d8a9113f1f 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -89,6 +89,7 @@ pub enum CodexErrorInfo { InternalServerError, Unauthorized, BadRequest, + ThreadRollbackFailed, SandboxError, /// The response SSE stream disconnected in the middle of a turn before completion. ResponseStreamDisconnected { @@ -119,6 +120,7 @@ impl From for CodexErrorInfo { CoreCodexErrorInfo::InternalServerError => CodexErrorInfo::InternalServerError, CoreCodexErrorInfo::Unauthorized => CodexErrorInfo::Unauthorized, CoreCodexErrorInfo::BadRequest => CodexErrorInfo::BadRequest, + CoreCodexErrorInfo::ThreadRollbackFailed => CodexErrorInfo::ThreadRollbackFailed, CoreCodexErrorInfo::SandboxError => CodexErrorInfo::SandboxError, CoreCodexErrorInfo::ResponseStreamDisconnected { http_status_code } => { CodexErrorInfo::ResponseStreamDisconnected { http_status_code } @@ -330,6 +332,15 @@ pub struct ProfileV2 { pub additional: HashMap, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export_to = "v2/")] +pub struct AnalyticsConfig { + pub enabled: Option, + #[serde(default, flatten)] + pub additional: HashMap, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "snake_case")] #[ts(export_to = "v2/")] @@ -354,6 +365,7 @@ pub struct Config { pub model_reasoning_effort: Option, pub model_reasoning_summary: Option, pub model_verbosity: Option, + pub analytics: Option, #[serde(default, flatten)] pub additional: HashMap, } @@ -441,6 +453,22 @@ pub struct ConfigReadResponse { pub layers: Option>, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct ConfigRequirements { + pub allowed_approval_policies: Option>, + pub allowed_sandbox_modes: Option>, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct ConfigRequirementsReadResponse { + /// Null if no requirements are configured (e.g. no requirements.toml/MDM entries). + pub requirements: Option, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -475,14 +503,33 @@ pub struct ConfigEdit { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] -pub enum ApprovalDecision { +pub enum CommandExecutionApprovalDecision { + /// User approved the command. Accept, - /// Approve and remember the approval for the session. + /// User approved the command and future identical commands should run without prompting. AcceptForSession, + /// User approved the command, and wants to apply the proposed execpolicy amendment so future + /// matching commands can run without prompting. AcceptWithExecpolicyAmendment { execpolicy_amendment: ExecPolicyAmendment, }, + /// User denied the command. The agent will continue the turn. Decline, + /// User denied the command. The turn will also be immediately interrupted. + Cancel, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub enum FileChangeApprovalDecision { + /// User approved the file changes. + Accept, + /// User approved the file changes and future changes to the same files should run without prompting. + AcceptForSession, + /// User denied the file changes. The agent will continue the turn. + Decline, + /// User denied the file changes. The turn will also be immediately interrupted. Cancel, } @@ -1045,6 +1092,30 @@ pub struct ThreadArchiveParams { #[ts(export_to = "v2/")] pub struct ThreadArchiveResponse {} +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct ThreadRollbackParams { + pub thread_id: String, + /// The number of turns to drop from the end of the thread. Must be >= 1. + /// + /// This only modifies the thread's history and does not revert local file changes + /// that have been made by the agent. Clients are responsible for reverting these changes. + pub num_turns: u32, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct ThreadRollbackResponse { + /// The updated thread after applying the rollback, with `turns` populated. + /// + /// The ThreadItems stored in each Turn are lossy since we explicitly do not + /// persist all agent interactions, such as command executions. This is the same + /// behavior as `thread/resume`. + pub thread: Thread, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -1183,7 +1254,7 @@ pub struct Thread { pub source: SessionSource, /// Optional Git metadata captured when the thread was created. pub git_info: Option, - /// Only populated on a `thread/resume` response. + /// Only populated on `thread/resume` and `thread/rollback` responses. /// For all other responses and notifications returning a Thread, /// the turns field will be an empty list. pub turns: Vec, @@ -1211,6 +1282,7 @@ pub struct ThreadTokenUsageUpdatedNotification { pub struct ThreadTokenUsage { pub total: TokenUsageBreakdown, pub last: TokenUsageBreakdown, + // TODO(aibrahim): make this not optional #[ts(type = "number | null")] pub model_context_window: Option, } @@ -1319,6 +1391,8 @@ pub struct TurnStartParams { pub effort: Option, /// Override the reasoning summary for this turn and subsequent turns. pub summary: Option, + /// Optional JSON Schema used to constrain the final assistant message for this turn. + pub output_schema: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -1402,6 +1476,7 @@ pub enum UserInput { Text { text: String }, Image { url: String }, LocalImage { path: PathBuf }, + Skill { name: String, path: PathBuf }, } impl UserInput { @@ -1410,6 +1485,7 @@ impl UserInput { UserInput::Text { text } => CoreUserInput::Text { text }, UserInput::Image { url } => CoreUserInput::Image { image_url: url }, UserInput::LocalImage { path } => CoreUserInput::LocalImage { path }, + UserInput::Skill { name, path } => CoreUserInput::Skill { name, path }, } } } @@ -1420,6 +1496,7 @@ impl From for UserInput { CoreUserInput::Text { text } => UserInput::Text { text }, CoreUserInput::Image { image_url } => UserInput::Image { url: image_url }, CoreUserInput::LocalImage { path } => UserInput::LocalImage { path }, + CoreUserInput::Skill { name, path } => UserInput::Skill { name, path }, _ => unreachable!("unsupported user input variant"), } } @@ -1846,7 +1923,7 @@ pub struct CommandExecutionRequestApprovalParams { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct CommandExecutionRequestApprovalResponse { - pub decision: ApprovalDecision, + pub decision: CommandExecutionApprovalDecision, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -1866,7 +1943,7 @@ pub struct FileChangeRequestApprovalParams { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[ts(export_to = "v2/")] pub struct FileChangeRequestApprovalResponse { - pub decision: ApprovalDecision, + pub decision: FileChangeApprovalDecision, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -2005,6 +2082,10 @@ mod tests { CoreUserInput::LocalImage { path: PathBuf::from("local/image.png"), }, + CoreUserInput::Skill { + name: "skill-creator".to_string(), + path: PathBuf::from("/repo/.codex/skills/skill-creator/SKILL.md"), + }, ], }); @@ -2022,6 +2103,10 @@ mod tests { UserInput::LocalImage { path: PathBuf::from("local/image.png"), }, + UserInput::Skill { + name: "skill-creator".to_string(), + path: PathBuf::from("/repo/.codex/skills/skill-creator/SKILL.md"), + }, ], } ); diff --git a/codex-rs/app-server-test-client/src/main.rs b/codex-rs/app-server-test-client/src/main.rs index b66c59d55a7..64577a5539b 100644 --- a/codex-rs/app-server-test-client/src/main.rs +++ b/codex-rs/app-server-test-client/src/main.rs @@ -13,16 +13,18 @@ use std::time::Duration; use anyhow::Context; use anyhow::Result; use anyhow::bail; +use clap::ArgAction; use clap::Parser; use clap::Subcommand; use codex_app_server_protocol::AddConversationListenerParams; use codex_app_server_protocol::AddConversationSubscriptionResponse; -use codex_app_server_protocol::ApprovalDecision; use codex_app_server_protocol::AskForApproval; use codex_app_server_protocol::ClientInfo; use codex_app_server_protocol::ClientRequest; +use codex_app_server_protocol::CommandExecutionApprovalDecision; use codex_app_server_protocol::CommandExecutionRequestApprovalParams; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; +use codex_app_server_protocol::FileChangeApprovalDecision; use codex_app_server_protocol::FileChangeRequestApprovalParams; use codex_app_server_protocol::FileChangeRequestApprovalResponse; use codex_app_server_protocol::GetAccountRateLimitsResponse; @@ -35,6 +37,8 @@ use codex_app_server_protocol::JSONRPCRequest; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::LoginChatGptCompleteNotification; use codex_app_server_protocol::LoginChatGptResponse; +use codex_app_server_protocol::ModelListParams; +use codex_app_server_protocol::ModelListResponse; use codex_app_server_protocol::NewConversationParams; use codex_app_server_protocol::NewConversationResponse; use codex_app_server_protocol::RequestId; @@ -49,7 +53,7 @@ use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::TurnStatus; use codex_app_server_protocol::UserInput as V2UserInput; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; use serde::Serialize; @@ -65,6 +69,19 @@ struct Cli { #[arg(long, env = "CODEX_BIN", default_value = "codex")] codex_bin: String, + /// Forwarded to the `codex` CLI as `--config key=value`. Repeatable. + /// + /// Example: + /// `--config 'model_providers.mock.base_url="http://localhost:4010/v2"'` + #[arg( + short = 'c', + long = "config", + value_name = "key=value", + action = ArgAction::Append, + global = true + )] + config_overrides: Vec, + #[command(subcommand)] command: CliCommand, } @@ -113,37 +130,54 @@ enum CliCommand { TestLogin, /// Fetch the current account rate limits from the Codex app-server. GetAccountRateLimits, + /// List the available models from the Codex app-server. + #[command(name = "model-list")] + ModelList, } fn main() -> Result<()> { - let Cli { codex_bin, command } = Cli::parse(); + let Cli { + codex_bin, + config_overrides, + command, + } = Cli::parse(); match command { - CliCommand::SendMessage { user_message } => send_message(codex_bin, user_message), - CliCommand::SendMessageV2 { user_message } => send_message_v2(codex_bin, user_message), + CliCommand::SendMessage { user_message } => { + send_message(&codex_bin, &config_overrides, user_message) + } + CliCommand::SendMessageV2 { user_message } => { + send_message_v2(&codex_bin, &config_overrides, user_message) + } CliCommand::TriggerCmdApproval { user_message } => { - trigger_cmd_approval(codex_bin, user_message) + trigger_cmd_approval(&codex_bin, &config_overrides, user_message) } CliCommand::TriggerPatchApproval { user_message } => { - trigger_patch_approval(codex_bin, user_message) + trigger_patch_approval(&codex_bin, &config_overrides, user_message) } - CliCommand::NoTriggerCmdApproval => no_trigger_cmd_approval(codex_bin), + CliCommand::NoTriggerCmdApproval => no_trigger_cmd_approval(&codex_bin, &config_overrides), CliCommand::SendFollowUpV2 { first_message, follow_up_message, - } => send_follow_up_v2(codex_bin, first_message, follow_up_message), - CliCommand::TestLogin => test_login(codex_bin), - CliCommand::GetAccountRateLimits => get_account_rate_limits(codex_bin), + } => send_follow_up_v2( + &codex_bin, + &config_overrides, + first_message, + follow_up_message, + ), + CliCommand::TestLogin => test_login(&codex_bin, &config_overrides), + CliCommand::GetAccountRateLimits => get_account_rate_limits(&codex_bin, &config_overrides), + CliCommand::ModelList => model_list(&codex_bin, &config_overrides), } } -fn send_message(codex_bin: String, user_message: String) -> Result<()> { - let mut client = CodexClient::spawn(codex_bin)?; +fn send_message(codex_bin: &str, config_overrides: &[String], user_message: String) -> Result<()> { + let mut client = CodexClient::spawn(codex_bin, config_overrides)?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); - let conversation = client.new_conversation()?; + let conversation = client.start_thread()?; println!("< newConversation response: {conversation:?}"); let subscription = client.add_conversation_listener(&conversation.conversation_id)?; @@ -154,51 +188,66 @@ fn send_message(codex_bin: String, user_message: String) -> Result<()> { client.stream_conversation(&conversation.conversation_id)?; - client.remove_conversation_listener(subscription.subscription_id)?; + client.remove_thread_listener(subscription.subscription_id)?; Ok(()) } -fn send_message_v2(codex_bin: String, user_message: String) -> Result<()> { - send_message_v2_with_policies(codex_bin, user_message, None, None) +fn send_message_v2( + codex_bin: &str, + config_overrides: &[String], + user_message: String, +) -> Result<()> { + send_message_v2_with_policies(codex_bin, config_overrides, user_message, None, None) } -fn trigger_cmd_approval(codex_bin: String, user_message: Option) -> Result<()> { +fn trigger_cmd_approval( + codex_bin: &str, + config_overrides: &[String], + user_message: Option, +) -> Result<()> { let default_prompt = "Run `touch /tmp/should-trigger-approval` so I can confirm the file exists."; let message = user_message.unwrap_or_else(|| default_prompt.to_string()); send_message_v2_with_policies( codex_bin, + config_overrides, message, Some(AskForApproval::OnRequest), Some(SandboxPolicy::ReadOnly), ) } -fn trigger_patch_approval(codex_bin: String, user_message: Option) -> Result<()> { +fn trigger_patch_approval( + codex_bin: &str, + config_overrides: &[String], + user_message: Option, +) -> Result<()> { let default_prompt = "Create a file named APPROVAL_DEMO.txt containing a short hello message using apply_patch."; let message = user_message.unwrap_or_else(|| default_prompt.to_string()); send_message_v2_with_policies( codex_bin, + config_overrides, message, Some(AskForApproval::OnRequest), Some(SandboxPolicy::ReadOnly), ) } -fn no_trigger_cmd_approval(codex_bin: String) -> Result<()> { +fn no_trigger_cmd_approval(codex_bin: &str, config_overrides: &[String]) -> Result<()> { let prompt = "Run `touch should_not_trigger_approval.txt`"; - send_message_v2_with_policies(codex_bin, prompt.to_string(), None, None) + send_message_v2_with_policies(codex_bin, config_overrides, prompt.to_string(), None, None) } fn send_message_v2_with_policies( - codex_bin: String, + codex_bin: &str, + config_overrides: &[String], user_message: String, approval_policy: Option, sandbox_policy: Option, ) -> Result<()> { - let mut client = CodexClient::spawn(codex_bin)?; + let mut client = CodexClient::spawn(codex_bin, config_overrides)?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); @@ -222,11 +271,12 @@ fn send_message_v2_with_policies( } fn send_follow_up_v2( - codex_bin: String, + codex_bin: &str, + config_overrides: &[String], first_message: String, follow_up_message: String, ) -> Result<()> { - let mut client = CodexClient::spawn(codex_bin)?; + let mut client = CodexClient::spawn(codex_bin, config_overrides)?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); @@ -259,8 +309,8 @@ fn send_follow_up_v2( Ok(()) } -fn test_login(codex_bin: String) -> Result<()> { - let mut client = CodexClient::spawn(codex_bin)?; +fn test_login(codex_bin: &str, config_overrides: &[String]) -> Result<()> { + let mut client = CodexClient::spawn(codex_bin, config_overrides)?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); @@ -289,8 +339,8 @@ fn test_login(codex_bin: String) -> Result<()> { } } -fn get_account_rate_limits(codex_bin: String) -> Result<()> { - let mut client = CodexClient::spawn(codex_bin)?; +fn get_account_rate_limits(codex_bin: &str, config_overrides: &[String]) -> Result<()> { + let mut client = CodexClient::spawn(codex_bin, config_overrides)?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); @@ -301,6 +351,18 @@ fn get_account_rate_limits(codex_bin: String) -> Result<()> { Ok(()) } +fn model_list(codex_bin: &str, config_overrides: &[String]) -> Result<()> { + let mut client = CodexClient::spawn(codex_bin, config_overrides)?; + + let initialize = client.initialize()?; + println!("< initialize response: {initialize:?}"); + + let response = client.model_list(ModelListParams::default())?; + println!("< model/list response: {response:?}"); + + Ok(()) +} + struct CodexClient { child: Child, stdin: Option, @@ -309,8 +371,12 @@ struct CodexClient { } impl CodexClient { - fn spawn(codex_bin: String) -> Result { - let mut codex_app_server = Command::new(&codex_bin) + fn spawn(codex_bin: &str, config_overrides: &[String]) -> Result { + let mut cmd = Command::new(codex_bin); + for override_kv in config_overrides { + cmd.arg("--config").arg(override_kv); + } + let mut codex_app_server = cmd .arg("app-server") .stdin(Stdio::piped()) .stdout(Stdio::piped()) @@ -351,7 +417,7 @@ impl CodexClient { self.send_request(request, request_id, "initialize") } - fn new_conversation(&mut self) -> Result { + fn start_thread(&mut self) -> Result { let request_id = self.request_id(); let request = ClientRequest::NewConversation { request_id: request_id.clone(), @@ -363,7 +429,7 @@ impl CodexClient { fn add_conversation_listener( &mut self, - conversation_id: &ConversationId, + conversation_id: &ThreadId, ) -> Result { let request_id = self.request_id(); let request = ClientRequest::AddConversationListener { @@ -377,7 +443,7 @@ impl CodexClient { self.send_request(request, request_id, "addConversationListener") } - fn remove_conversation_listener(&mut self, subscription_id: Uuid) -> Result<()> { + fn remove_thread_listener(&mut self, subscription_id: Uuid) -> Result<()> { let request_id = self.request_id(); let request = ClientRequest::RemoveConversationListener { request_id: request_id.clone(), @@ -395,7 +461,7 @@ impl CodexClient { fn send_user_message( &mut self, - conversation_id: &ConversationId, + conversation_id: &ThreadId, message: &str, ) -> Result { let request_id = self.request_id(); @@ -452,7 +518,17 @@ impl CodexClient { self.send_request(request, request_id, "account/rateLimits/read") } - fn stream_conversation(&mut self, conversation_id: &ConversationId) -> Result<()> { + fn model_list(&mut self, params: ModelListParams) -> Result { + let request_id = self.request_id(); + let request = ClientRequest::ModelList { + request_id: request_id.clone(), + params, + }; + + self.send_request(request, request_id, "model/list") + } + + fn stream_conversation(&mut self, conversation_id: &ThreadId) -> Result<()> { loop { let notification = self.next_notification()?; @@ -589,7 +665,7 @@ impl CodexClient { fn extract_event( &self, notification: JSONRPCNotification, - conversation_id: &ConversationId, + conversation_id: &ThreadId, ) -> Result> { let params = notification .params @@ -603,7 +679,7 @@ impl CodexClient { let conversation_value = map .remove("conversationId") .context("event missing conversationId")?; - let notification_conversation: ConversationId = serde_json::from_value(conversation_value) + let notification_conversation: ThreadId = serde_json::from_value(conversation_value) .context("conversationId was not a valid UUID")?; if ¬ification_conversation != conversation_id { @@ -770,7 +846,7 @@ impl CodexClient { } let response = CommandExecutionRequestApprovalResponse { - decision: ApprovalDecision::Accept, + decision: CommandExecutionApprovalDecision::Accept, }; self.send_server_request_response(request_id, &response)?; println!("< approved commandExecution request for item {item_id}"); @@ -801,7 +877,7 @@ impl CodexClient { } let response = FileChangeRequestApprovalResponse { - decision: ApprovalDecision::Accept, + decision: FileChangeApprovalDecision::Accept, }; self.send_server_request_response(request_id, &response)?; println!("< approved fileChange request for item {item_id}"); diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index 787ec398d3f..0d8fafad25b 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -11,6 +11,8 @@ - [Initialization](#initialization) - [API Overview](#api-overview) - [Events](#events) +- [Approvals](#approvals) +- [Skills](#skills) - [Auth endpoints](#auth-endpoints) ## Protocol @@ -72,6 +74,7 @@ Example (from OpenAI's official VSCode extension): - `thread/resume` — reopen an existing thread by id so subsequent `turn/start` calls append to it. - `thread/list` — page through stored rollouts; supports cursor-based pagination and optional `modelProviders` filtering. - `thread/archive` — move a thread’s rollout file into the archived directory; returns `{}` on success. +- `thread/rollback` — drop the last N turns from the agent’s in-memory context and persist a rollback marker in the rollout so future resumes see the pruned history; returns the updated `thread` (with `turns` populated) on success. - `turn/start` — add user input to a thread and begin Codex generation; responds with the initial `turn` object and streams `turn/started`, `item/*`, and `turn/completed` notifications. - `turn/interrupt` — request cancellation of an in-flight turn by `(thread_id, turn_id)`; success is an empty `{}` response and the turn finishes with `status: "interrupted"`. - `review/start` — kick off Codex’s automated reviewer for a thread; responds like `turn/start` and emits `item/started`/`item/completed` notifications with `enteredReviewMode` and `exitedReviewMode` items, plus a final assistant `agentMessage` containing the review. @@ -85,6 +88,7 @@ Example (from OpenAI's official VSCode extension): - `config/read` — fetch the effective config on disk after resolving config layering. - `config/value/write` — write a single config key/value to the user's config.toml on disk. - `config/batchWrite` — apply multiple config edits atomically to the user's config.toml on disk. +- `configRequirements/read` — fetch the loaded requirements allow-lists from `requirements.toml` and/or MDM (or `null` if none are configured). ### Example: Start or resume a thread @@ -162,7 +166,7 @@ Turns attach user input (text or images) to a thread and trigger Codex generatio - `{"type":"image","url":"https://…png"}` - `{"type":"localImage","path":"/tmp/screenshot.png"}` -You can optionally specify config overrides on the new turn. If specified, these settings become the default for subsequent turns on the same thread. +You can optionally specify config overrides on the new turn. If specified, these settings become the default for subsequent turns on the same thread. `outputSchema` applies only to the current turn. ```json { "method": "turn/start", "id": 30, "params": { @@ -178,7 +182,14 @@ You can optionally specify config overrides on the new turn. If specified, these }, "model": "gpt-5.1-codex", "effort": "medium", - "summary": "concise" + "summary": "concise", + // Optional JSON Schema to constrain the final assistant message for this turn. + "outputSchema": { + "type": "object", + "properties": { "answer": { "type": "string" } }, + "required": ["answer"], + "additionalProperties": false + } } } { "id": 30, "result": { "turn": { "id": "turn_456", @@ -188,6 +199,26 @@ You can optionally specify config overrides on the new turn. If specified, these } } } ``` +### Example: Start a turn (invoke a skill) + +Invoke a skill explicitly by including `$` in the text input and adding a `skill` input item alongside it. + +```json +{ "method": "turn/start", "id": 33, "params": { + "threadId": "thr_123", + "input": [ + { "type": "text", "text": "$skill-creator Add a new skill for triaging flaky CI and include step-by-step usage." }, + { "type": "skill", "name": "skill-creator", "path": "/Users/me/.codex/skills/skill-creator/SKILL.md" } + ] +} } +{ "id": 33, "result": { "turn": { + "id": "turn_457", + "status": "inProgress", + "items": [], + "error": null +} } } +``` + ### Example: Interrupt an active turn You can cancel a running Turn with `turn/interrupt`. @@ -397,6 +428,46 @@ Order of messages: UI guidance for IDEs: surface an approval dialog as soon as the request arrives. The turn will proceed after the server receives a response to the approval request. The terminal `item/completed` notification will be sent with the appropriate status. +## Skills + +Invoke a skill by including `$` in the text input. Add a `skill` input item (recommended) so the backend injects full skill instructions instead of relying on the model to resolve the name. + +```json +{ + "method": "turn/start", + "id": 101, + "params": { + "threadId": "thread-1", + "input": [ + { "type": "text", "text": "$skill-creator Add a new skill for triaging flaky CI." }, + { "type": "skill", "name": "skill-creator", "path": "/Users/me/.codex/skills/skill-creator/SKILL.md" } + ] + } +} +``` + +If you omit the `skill` item, the model will still parse the `$` marker and try to locate the skill, which can add latency. + +Example: + +``` +$skill-creator Add a new skill for triaging flaky CI and include step-by-step usage. +``` + +Use `skills/list` to fetch the available skills (optionally scoped by `cwd` and/or with `forceReload`). + +```json +{ "method": "skills/list", "id": 25, "params": { + "cwd": "/Users/me/project", + "forceReload": false +} } +{ "id": 25, "result": { + "skills": [ + { "name": "skill-creator", "description": "Create or update a Codex skill" } + ] +} } +``` + ## Auth endpoints The JSON-RPC auth/account surface exposes request/response methods plus server-initiated notifications (no `id`). Use these to determine auth state, start or cancel logins, logout, and inspect ChatGPT rate limits. diff --git a/codex-rs/app-server/src/bespoke_event_handling.rs b/codex-rs/app-server/src/bespoke_event_handling.rs index ad0455a0587..d18a7878f47 100644 --- a/codex-rs/app-server/src/bespoke_event_handling.rs +++ b/codex-rs/app-server/src/bespoke_event_handling.rs @@ -1,15 +1,21 @@ use crate::codex_message_processor::ApiVersion; use crate::codex_message_processor::PendingInterrupts; +use crate::codex_message_processor::PendingRollbacks; use crate::codex_message_processor::TurnSummary; use crate::codex_message_processor::TurnSummaryStore; +use crate::codex_message_processor::read_event_msgs_from_rollout; +use crate::codex_message_processor::read_summary_from_rollout; +use crate::codex_message_processor::summary_to_thread; +use crate::error_code::INTERNAL_ERROR_CODE; +use crate::error_code::INVALID_REQUEST_ERROR_CODE; use crate::outgoing_message::OutgoingMessageSender; use codex_app_server_protocol::AccountRateLimitsUpdatedNotification; use codex_app_server_protocol::AgentMessageDeltaNotification; use codex_app_server_protocol::ApplyPatchApprovalParams; use codex_app_server_protocol::ApplyPatchApprovalResponse; -use codex_app_server_protocol::ApprovalDecision; use codex_app_server_protocol::CodexErrorInfo as V2CodexErrorInfo; use codex_app_server_protocol::CommandAction as V2ParsedCommand; +use codex_app_server_protocol::CommandExecutionApprovalDecision; use codex_app_server_protocol::CommandExecutionOutputDeltaNotification; use codex_app_server_protocol::CommandExecutionRequestApprovalParams; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; @@ -20,6 +26,7 @@ use codex_app_server_protocol::ErrorNotification; use codex_app_server_protocol::ExecCommandApprovalParams; use codex_app_server_protocol::ExecCommandApprovalResponse; use codex_app_server_protocol::ExecPolicyAmendment as V2ExecPolicyAmendment; +use codex_app_server_protocol::FileChangeApprovalDecision; use codex_app_server_protocol::FileChangeOutputDeltaNotification; use codex_app_server_protocol::FileChangeRequestApprovalParams; use codex_app_server_protocol::FileChangeRequestApprovalResponse; @@ -27,6 +34,7 @@ use codex_app_server_protocol::FileUpdateChange; use codex_app_server_protocol::InterruptConversationResponse; use codex_app_server_protocol::ItemCompletedNotification; use codex_app_server_protocol::ItemStartedNotification; +use codex_app_server_protocol::JSONRPCErrorError; use codex_app_server_protocol::McpToolCallError; use codex_app_server_protocol::McpToolCallResult; use codex_app_server_protocol::McpToolCallStatus; @@ -40,6 +48,7 @@ use codex_app_server_protocol::ServerNotification; use codex_app_server_protocol::ServerRequestPayload; use codex_app_server_protocol::TerminalInteractionNotification; use codex_app_server_protocol::ThreadItem; +use codex_app_server_protocol::ThreadRollbackResponse; use codex_app_server_protocol::ThreadTokenUsage; use codex_app_server_protocol::ThreadTokenUsageUpdatedNotification; use codex_app_server_protocol::Turn; @@ -50,9 +59,11 @@ use codex_app_server_protocol::TurnInterruptResponse; use codex_app_server_protocol::TurnPlanStep; use codex_app_server_protocol::TurnPlanUpdatedNotification; use codex_app_server_protocol::TurnStatus; -use codex_core::CodexConversation; +use codex_app_server_protocol::build_turns_from_event_msgs; +use codex_core::CodexThread; use codex_core::parse_command::shlex_join; use codex_core::protocol::ApplyPatchApprovalRequestEvent; +use codex_core::protocol::CodexErrorInfo as CoreCodexErrorInfo; use codex_core::protocol::Event; use codex_core::protocol::EventMsg; use codex_core::protocol::ExecApprovalRequestEvent; @@ -66,7 +77,7 @@ use codex_core::protocol::TokenCountEvent; use codex_core::protocol::TurnDiffEvent; use codex_core::review_format::format_review_findings_block; use codex_core::review_prompts; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::protocol::ReviewOutputEvent; use std::collections::HashMap; @@ -78,14 +89,17 @@ use tracing::error; type JsonValue = serde_json::Value; +#[allow(clippy::too_many_arguments)] pub(crate) async fn apply_bespoke_event_handling( event: Event, - conversation_id: ConversationId, - conversation: Arc, + conversation_id: ThreadId, + conversation: Arc, outgoing: Arc, pending_interrupts: PendingInterrupts, + pending_rollbacks: PendingRollbacks, turn_summary_store: TurnSummaryStore, api_version: ApiVersion, + fallback_model_provider: String, ) { let Event { id: event_turn_id, @@ -337,6 +351,26 @@ pub(crate) async fn apply_bespoke_event_handling( .await; } EventMsg::Error(ev) => { + let message = ev.message.clone(); + let codex_error_info = ev.codex_error_info.clone(); + + // If this error belongs to an in-flight `thread/rollback` request, fail that request + // (and clear pending state) so subsequent rollbacks are unblocked. + // + // Don't send a notification for this error. + if matches!( + codex_error_info, + Some(CoreCodexErrorInfo::ThreadRollbackFailed) + ) { + return handle_thread_rollback_failed( + conversation_id, + message, + &pending_rollbacks, + &outgoing, + ) + .await; + }; + let turn_error = TurnError { message: ev.message, codex_error_info: ev.codex_error_info.map(V2CodexErrorInfo::from), @@ -345,7 +379,7 @@ pub(crate) async fn apply_bespoke_event_handling( handle_error(conversation_id, turn_error.clone(), &turn_summary_store).await; outgoing .send_server_notification(ServerNotification::Error(ErrorNotification { - error: turn_error, + error: turn_error.clone(), will_retry: false, thread_id: conversation_id.to_string(), turn_id: event_turn_id.clone(), @@ -690,6 +724,58 @@ pub(crate) async fn apply_bespoke_event_handling( ) .await; } + EventMsg::ThreadRolledBack(_rollback_event) => { + let pending = { + let mut map = pending_rollbacks.lock().await; + map.remove(&conversation_id) + }; + + if let Some(request_id) = pending { + let rollout_path = conversation.rollout_path(); + let response = match read_summary_from_rollout( + rollout_path.as_path(), + fallback_model_provider.as_str(), + ) + .await + { + Ok(summary) => { + let mut thread = summary_to_thread(summary); + match read_event_msgs_from_rollout(rollout_path.as_path()).await { + Ok(events) => { + thread.turns = build_turns_from_event_msgs(&events); + ThreadRollbackResponse { thread } + } + Err(err) => { + let error = JSONRPCErrorError { + code: INTERNAL_ERROR_CODE, + message: format!( + "failed to load rollout `{}`: {err}", + rollout_path.display() + ), + data: None, + }; + outgoing.send_error(request_id, error).await; + return; + } + } + } + Err(err) => { + let error = JSONRPCErrorError { + code: INTERNAL_ERROR_CODE, + message: format!( + "failed to load rollout `{}`: {err}", + rollout_path.display() + ), + data: None, + }; + outgoing.send_error(request_id, error).await; + return; + } + }; + + outgoing.send_response(request_id, response).await; + } + } EventMsg::TurnDiff(turn_diff_event) => { handle_turn_diff( conversation_id, @@ -716,7 +802,7 @@ pub(crate) async fn apply_bespoke_event_handling( } async fn handle_turn_diff( - conversation_id: ConversationId, + conversation_id: ThreadId, event_turn_id: &str, turn_diff_event: TurnDiffEvent, api_version: ApiVersion, @@ -735,7 +821,7 @@ async fn handle_turn_diff( } async fn handle_turn_plan_update( - conversation_id: ConversationId, + conversation_id: ThreadId, event_turn_id: &str, plan_update_event: UpdatePlanArgs, api_version: ApiVersion, @@ -759,7 +845,7 @@ async fn handle_turn_plan_update( } async fn emit_turn_completed_with_status( - conversation_id: ConversationId, + conversation_id: ThreadId, event_turn_id: String, status: TurnStatus, error: Option, @@ -780,7 +866,7 @@ async fn emit_turn_completed_with_status( } async fn complete_file_change_item( - conversation_id: ConversationId, + conversation_id: ThreadId, item_id: String, changes: Vec, status: PatchApplyStatus, @@ -812,7 +898,7 @@ async fn complete_file_change_item( #[allow(clippy::too_many_arguments)] async fn complete_command_execution_item( - conversation_id: ConversationId, + conversation_id: ThreadId, turn_id: String, item_id: String, command: String, @@ -845,7 +931,7 @@ async fn complete_command_execution_item( async fn maybe_emit_raw_response_item_completed( api_version: ApiVersion, - conversation_id: ConversationId, + conversation_id: ThreadId, turn_id: &str, item: codex_protocol::models::ResponseItem, outgoing: &OutgoingMessageSender, @@ -865,7 +951,7 @@ async fn maybe_emit_raw_response_item_completed( } async fn find_and_remove_turn_summary( - conversation_id: ConversationId, + conversation_id: ThreadId, turn_summary_store: &TurnSummaryStore, ) -> TurnSummary { let mut map = turn_summary_store.lock().await; @@ -873,7 +959,7 @@ async fn find_and_remove_turn_summary( } async fn handle_turn_complete( - conversation_id: ConversationId, + conversation_id: ThreadId, event_turn_id: String, outgoing: &OutgoingMessageSender, turn_summary_store: &TurnSummaryStore, @@ -889,7 +975,7 @@ async fn handle_turn_complete( } async fn handle_turn_interrupted( - conversation_id: ConversationId, + conversation_id: ThreadId, event_turn_id: String, outgoing: &OutgoingMessageSender, turn_summary_store: &TurnSummaryStore, @@ -906,8 +992,33 @@ async fn handle_turn_interrupted( .await; } +async fn handle_thread_rollback_failed( + conversation_id: ThreadId, + message: String, + pending_rollbacks: &PendingRollbacks, + outgoing: &OutgoingMessageSender, +) { + let pending_rollback = { + let mut map = pending_rollbacks.lock().await; + map.remove(&conversation_id) + }; + + if let Some(request_id) = pending_rollback { + outgoing + .send_error( + request_id, + JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: message.clone(), + data: None, + }, + ) + .await; + } +} + async fn handle_token_count_event( - conversation_id: ConversationId, + conversation_id: ThreadId, turn_id: String, token_count_event: TokenCountEvent, outgoing: &OutgoingMessageSender, @@ -935,7 +1046,7 @@ async fn handle_token_count_event( } async fn handle_error( - conversation_id: ConversationId, + conversation_id: ThreadId, error: TurnError, turn_summary_store: &TurnSummaryStore, ) { @@ -946,7 +1057,7 @@ async fn handle_error( async fn on_patch_approval_response( event_turn_id: String, receiver: oneshot::Receiver, - codex: Arc, + codex: Arc, ) { let response = receiver.await; let value = match response { @@ -988,7 +1099,7 @@ async fn on_patch_approval_response( async fn on_exec_approval_response( event_turn_id: String, receiver: oneshot::Receiver, - conversation: Arc, + conversation: Arc, ) { let response = receiver.await; let value = match response { @@ -1083,14 +1194,29 @@ fn format_file_change_diff(change: &CoreFileChange) -> String { } } +fn map_file_change_approval_decision( + decision: FileChangeApprovalDecision, +) -> (ReviewDecision, Option) { + match decision { + FileChangeApprovalDecision::Accept => (ReviewDecision::Approved, None), + FileChangeApprovalDecision::AcceptForSession => (ReviewDecision::ApprovedForSession, None), + FileChangeApprovalDecision::Decline => { + (ReviewDecision::Denied, Some(PatchApplyStatus::Declined)) + } + FileChangeApprovalDecision::Cancel => { + (ReviewDecision::Abort, Some(PatchApplyStatus::Declined)) + } + } +} + #[allow(clippy::too_many_arguments)] async fn on_file_change_request_approval_response( event_turn_id: String, - conversation_id: ConversationId, + conversation_id: ThreadId, item_id: String, changes: Vec, receiver: oneshot::Receiver, - codex: Arc, + codex: Arc, outgoing: Arc, turn_summary_store: TurnSummaryStore, ) { @@ -1101,23 +1227,12 @@ async fn on_file_change_request_approval_response( .unwrap_or_else(|err| { error!("failed to deserialize FileChangeRequestApprovalResponse: {err}"); FileChangeRequestApprovalResponse { - decision: ApprovalDecision::Decline, + decision: FileChangeApprovalDecision::Decline, } }); - let (decision, completion_status) = match response.decision { - ApprovalDecision::Accept - | ApprovalDecision::AcceptForSession - | ApprovalDecision::AcceptWithExecpolicyAmendment { .. } => { - (ReviewDecision::Approved, None) - } - ApprovalDecision::Decline => { - (ReviewDecision::Denied, Some(PatchApplyStatus::Declined)) - } - ApprovalDecision::Cancel => { - (ReviewDecision::Abort, Some(PatchApplyStatus::Declined)) - } - }; + let (decision, completion_status) = + map_file_change_approval_decision(response.decision); // Allow EventMsg::PatchApplyEnd to emit ItemCompleted for accepted patches. // Only short-circuit on declines/cancels/failures. (decision, completion_status) @@ -1155,13 +1270,13 @@ async fn on_file_change_request_approval_response( #[allow(clippy::too_many_arguments)] async fn on_command_execution_request_approval_response( event_turn_id: String, - conversation_id: ConversationId, + conversation_id: ThreadId, item_id: String, command: String, cwd: PathBuf, command_actions: Vec, receiver: oneshot::Receiver, - conversation: Arc, + conversation: Arc, outgoing: Arc, ) { let response = receiver.await; @@ -1171,16 +1286,18 @@ async fn on_command_execution_request_approval_response( .unwrap_or_else(|err| { error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}"); CommandExecutionRequestApprovalResponse { - decision: ApprovalDecision::Decline, + decision: CommandExecutionApprovalDecision::Decline, } }); let decision = response.decision; let (decision, completion_status) = match decision { - ApprovalDecision::Accept => (ReviewDecision::Approved, None), - ApprovalDecision::AcceptForSession => (ReviewDecision::ApprovedForSession, None), - ApprovalDecision::AcceptWithExecpolicyAmendment { + CommandExecutionApprovalDecision::Accept => (ReviewDecision::Approved, None), + CommandExecutionApprovalDecision::AcceptForSession => { + (ReviewDecision::ApprovedForSession, None) + } + CommandExecutionApprovalDecision::AcceptWithExecpolicyAmendment { execpolicy_amendment, } => ( ReviewDecision::ApprovedExecpolicyAmendment { @@ -1188,11 +1305,11 @@ async fn on_command_execution_request_approval_response( }, None, ), - ApprovalDecision::Decline => ( + CommandExecutionApprovalDecision::Decline => ( ReviewDecision::Denied, Some(CommandExecutionStatus::Declined), ), - ApprovalDecision::Cancel => ( + CommandExecutionApprovalDecision::Cancel => ( ReviewDecision::Abort, Some(CommandExecutionStatus::Declined), ), @@ -1332,9 +1449,17 @@ mod tests { Arc::new(Mutex::new(HashMap::new())) } + #[test] + fn file_change_accept_for_session_maps_to_approved_for_session() { + let (decision, completion_status) = + map_file_change_approval_decision(FileChangeApprovalDecision::AcceptForSession); + assert_eq!(decision, ReviewDecision::ApprovedForSession); + assert_eq!(completion_status, None); + } + #[tokio::test] async fn test_handle_error_records_message() -> Result<()> { - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let turn_summary_store = new_turn_summary_store(); handle_error( @@ -1362,7 +1487,7 @@ mod tests { #[tokio::test] async fn test_handle_turn_complete_emits_completed_without_error() -> Result<()> { - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let event_turn_id = "complete1".to_string(); let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY); let outgoing = Arc::new(OutgoingMessageSender::new(tx)); @@ -1394,7 +1519,7 @@ mod tests { #[tokio::test] async fn test_handle_turn_interrupted_emits_interrupted_with_error() -> Result<()> { - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let event_turn_id = "interrupt1".to_string(); let turn_summary_store = new_turn_summary_store(); handle_error( @@ -1436,7 +1561,7 @@ mod tests { #[tokio::test] async fn test_handle_turn_complete_emits_failed_with_error() -> Result<()> { - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let event_turn_id = "complete_err1".to_string(); let turn_summary_store = new_turn_summary_store(); handle_error( @@ -1501,7 +1626,7 @@ mod tests { ], }; - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); handle_turn_plan_update( conversation_id, @@ -1535,7 +1660,7 @@ mod tests { #[tokio::test] async fn test_handle_token_count_event_emits_usage_and_rate_limits() -> Result<()> { - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let turn_id = "turn-123".to_string(); let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY); let outgoing = Arc::new(OutgoingMessageSender::new(tx)); @@ -1620,7 +1745,7 @@ mod tests { #[tokio::test] async fn test_handle_token_count_event_without_usage_info() -> Result<()> { - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let turn_id = "turn-456".to_string(); let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY); let outgoing = Arc::new(OutgoingMessageSender::new(tx)); @@ -1654,7 +1779,7 @@ mod tests { }, }; - let thread_id = ConversationId::new().to_string(); + let thread_id = ThreadId::new().to_string(); let turn_id = "turn_1".to_string(); let notification = construct_mcp_tool_call_notification( begin_event.clone(), @@ -1684,8 +1809,8 @@ mod tests { #[tokio::test] async fn test_handle_turn_complete_emits_error_multiple_turns() -> Result<()> { // Conversation A will have two turns; Conversation B will have one turn. - let conversation_a = ConversationId::new(); - let conversation_b = ConversationId::new(); + let conversation_a = ThreadId::new(); + let conversation_b = ThreadId::new(); let turn_summary_store = new_turn_summary_store(); let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY); @@ -1812,7 +1937,7 @@ mod tests { }, }; - let thread_id = ConversationId::new().to_string(); + let thread_id = ThreadId::new().to_string(); let turn_id = "turn_2".to_string(); let notification = construct_mcp_tool_call_notification( begin_event.clone(), @@ -1863,7 +1988,7 @@ mod tests { result: Ok(result), }; - let thread_id = ConversationId::new().to_string(); + let thread_id = ThreadId::new().to_string(); let turn_id = "turn_3".to_string(); let notification = construct_mcp_tool_call_end_notification( end_event.clone(), @@ -1906,7 +2031,7 @@ mod tests { result: Err("boom".to_string()), }; - let thread_id = ConversationId::new().to_string(); + let thread_id = ThreadId::new().to_string(); let turn_id = "turn_4".to_string(); let notification = construct_mcp_tool_call_end_notification( end_event.clone(), @@ -1940,7 +2065,7 @@ mod tests { let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY); let outgoing = OutgoingMessageSender::new(tx); let unified_diff = "--- a\n+++ b\n".to_string(); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); handle_turn_diff( conversation_id, @@ -1975,7 +2100,7 @@ mod tests { async fn test_handle_turn_diff_is_noop_for_v1() -> Result<()> { let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY); let outgoing = OutgoingMessageSender::new(tx); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); handle_turn_diff( conversation_id, diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index d1804801d58..d17dc76b4b2 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -91,6 +91,7 @@ use codex_app_server_protocol::ThreadListParams; use codex_app_server_protocol::ThreadListResponse; use codex_app_server_protocol::ThreadResumeParams; use codex_app_server_protocol::ThreadResumeResponse; +use codex_app_server_protocol::ThreadRollbackParams; use codex_app_server_protocol::ThreadStartParams; use codex_app_server_protocol::ThreadStartResponse; use codex_app_server_protocol::ThreadStartedNotification; @@ -107,14 +108,14 @@ use codex_app_server_protocol::UserSavedConfig; use codex_app_server_protocol::build_turns_from_event_msgs; use codex_backend_client::Client as BackendClient; use codex_core::AuthManager; -use codex_core::CodexConversation; -use codex_core::ConversationManager; +use codex_core::CodexThread; use codex_core::Cursor as RolloutCursor; use codex_core::INTERACTIVE_SESSION_SOURCES; use codex_core::InitialHistory; -use codex_core::NewConversation; +use codex_core::NewThread; use codex_core::RolloutRecorder; use codex_core::SessionMeta; +use codex_core::ThreadManager; use codex_core::auth::CLIENT_ID; use codex_core::auth::login_with_api_key; use codex_core::config::Config; @@ -126,7 +127,7 @@ use codex_core::default_client::get_codex_user_agent; use codex_core::exec::ExecParams; use codex_core::exec_env::create_env; use codex_core::features::Feature; -use codex_core::find_conversation_path_by_id_str; +use codex_core::find_thread_path_by_id_str; use codex_core::git_info::git_diff_to_remote; use codex_core::mcp::collect_mcp_snapshot; use codex_core::mcp::group_tools_by_server; @@ -143,7 +144,7 @@ use codex_feedback::CodexFeedback; use codex_login::ServerOptions as LoginServerOptions; use codex_login::ShutdownHandle; use codex_login::run_login_server; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::config_types::ForcedLoginMethod; use codex_protocol::items::TurnItem; use codex_protocol::models::ResponseItem; @@ -176,7 +177,9 @@ use tracing::warn; use uuid::Uuid; type PendingInterruptQueue = Vec<(RequestId, ApiVersion)>; -pub(crate) type PendingInterrupts = Arc>>; +pub(crate) type PendingInterrupts = Arc>>; + +pub(crate) type PendingRollbacks = Arc>>; /// Per-conversation accumulation of the latest states e.g. error message while a turn runs. #[derive(Default, Clone)] @@ -185,7 +188,7 @@ pub(crate) struct TurnSummary { pub(crate) last_error: Option, } -pub(crate) type TurnSummaryStore = Arc>>; +pub(crate) type TurnSummaryStore = Arc>>; const THREAD_LIST_DEFAULT_LIMIT: usize = 25; const THREAD_LIST_MAX_LIMIT: usize = 100; @@ -208,10 +211,10 @@ impl Drop for ActiveLogin { } } -/// Handles JSON-RPC messages for Codex conversations. +/// Handles JSON-RPC messages for Codex threads (and legacy conversation APIs). pub(crate) struct CodexMessageProcessor { auth_manager: Arc, - conversation_manager: Arc, + thread_manager: Arc, outgoing: Arc, codex_linux_sandbox_exe: Option, config: Arc, @@ -220,6 +223,8 @@ pub(crate) struct CodexMessageProcessor { active_login: Arc>>, // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives. pending_interrupts: PendingInterrupts, + // Queue of pending rollback requests per conversation. We reply when ThreadRollback arrives. + pending_rollbacks: PendingRollbacks, turn_summary_store: TurnSummaryStore, pending_fuzzy_searches: Arc>>>, feedback: CodexFeedback, @@ -232,33 +237,32 @@ pub(crate) enum ApiVersion { } impl CodexMessageProcessor { - async fn conversation_from_thread_id( + async fn load_thread( &self, thread_id: &str, - ) -> Result<(ConversationId, Arc), JSONRPCErrorError> { - // Resolve conversation id from v2 thread id string. - let conversation_id = - ConversationId::from_string(thread_id).map_err(|err| JSONRPCErrorError { - code: INVALID_REQUEST_ERROR_CODE, - message: format!("invalid thread id: {err}"), - data: None, - })?; + ) -> Result<(ThreadId, Arc), JSONRPCErrorError> { + // Resolve the core conversation handle from a v2 thread id string. + let thread_id = ThreadId::from_string(thread_id).map_err(|err| JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("invalid thread id: {err}"), + data: None, + })?; - let conversation = self - .conversation_manager - .get_conversation(conversation_id) + let thread = self + .thread_manager + .get_thread(thread_id) .await .map_err(|_| JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, - message: format!("conversation not found: {conversation_id}"), + message: format!("thread not found: {thread_id}"), data: None, })?; - Ok((conversation_id, conversation)) + Ok((thread_id, thread)) } pub fn new( auth_manager: Arc, - conversation_manager: Arc, + thread_manager: Arc, outgoing: Arc, codex_linux_sandbox_exe: Option, config: Arc, @@ -267,7 +271,7 @@ impl CodexMessageProcessor { ) -> Self { Self { auth_manager, - conversation_manager, + thread_manager, outgoing, codex_linux_sandbox_exe, config, @@ -275,6 +279,7 @@ impl CodexMessageProcessor { conversation_listeners: HashMap::new(), active_login: Arc::new(Mutex::new(None)), pending_interrupts: Arc::new(Mutex::new(HashMap::new())), + pending_rollbacks: Arc::new(Mutex::new(HashMap::new())), turn_summary_store: Arc::new(Mutex::new(HashMap::new())), pending_fuzzy_searches: Arc::new(Mutex::new(HashMap::new())), feedback, @@ -365,6 +370,9 @@ impl CodexMessageProcessor { ClientRequest::ThreadArchive { request_id, params } => { self.thread_archive(request_id, params).await; } + ClientRequest::ThreadRollback { request_id, params } => { + self.thread_rollback(request_id, params).await; + } ClientRequest::ThreadList { request_id, params } => { self.thread_list(request_id, params).await; } @@ -387,19 +395,18 @@ impl CodexMessageProcessor { self.process_new_conversation(request_id, params).await; } ClientRequest::GetConversationSummary { request_id, params } => { - self.get_conversation_summary(request_id, params).await; + self.get_thread_summary(request_id, params).await; } ClientRequest::ListConversations { request_id, params } => { self.handle_list_conversations(request_id, params).await; } ClientRequest::ModelList { request_id, params } => { let outgoing = self.outgoing.clone(); - let conversation_manager = self.conversation_manager.clone(); + let thread_manager = self.thread_manager.clone(); let config = self.config.clone(); tokio::spawn(async move { - Self::list_models(outgoing, conversation_manager, config, request_id, params) - .await; + Self::list_models(outgoing, thread_manager, config, request_id, params).await; }); } ClientRequest::McpServerOauthLogin { request_id, params } => { @@ -442,7 +449,7 @@ impl CodexMessageProcessor { self.add_conversation_listener(request_id, params).await; } ClientRequest::RemoveConversationListener { request_id, params } => { - self.remove_conversation_listener(request_id, params).await; + self.remove_thread_listener(request_id, params).await; } ClientRequest::GitDiffToRemote { request_id, params } => { self.git_diff_to_origin(request_id, params.cwd).await; @@ -503,6 +510,9 @@ impl CodexMessageProcessor { | ClientRequest::ConfigBatchWrite { .. } => { warn!("Config request reached CodexMessageProcessor unexpectedly"); } + ClientRequest::ConfigRequirementsRead { .. } => { + warn!("ConfigRequirementsRead request reached CodexMessageProcessor unexpectedly"); + } ClientRequest::GetAccountRateLimits { request_id, params: _, @@ -1103,7 +1113,7 @@ impl CodexMessageProcessor { } async fn get_user_saved_config(&self, request_id: RequestId) { - let service = ConfigService::new(self.config.codex_home.clone(), Vec::new()); + let service = ConfigService::new_with_defaults(self.config.codex_home.clone()); let user_saved_config: UserSavedConfig = match service.load_user_saved_config().await { Ok(config) => config, Err(err) => { @@ -1246,14 +1256,14 @@ impl CodexMessageProcessor { cwd, approval_policy, sandbox: sandbox_mode, - config: cli_overrides, + config: request_overrides, base_instructions, developer_instructions, compact_prompt, include_apply_patch_tool, } = params; - let overrides = ConfigOverrides { + let typesafe_overrides = ConfigOverrides { model, config_profile: profile, cwd: cwd.clone().map(PathBuf::from), @@ -1270,15 +1280,21 @@ impl CodexMessageProcessor { // Persist windows sandbox feature. // TODO: persist default config in general. - let mut cli_overrides = cli_overrides.unwrap_or_default(); + let mut request_overrides = request_overrides.unwrap_or_default(); if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) { - cli_overrides.insert( + request_overrides.insert( "features.experimental_windows_sandbox".to_string(), serde_json::json!(true), ); } - let config = match derive_config_from_params(overrides, Some(cli_overrides)).await { + let config = match derive_config_from_params( + &self.cli_overrides, + Some(request_overrides), + typesafe_overrides, + ) + .await + { Ok(config) => config, Err(err) => { let error = JSONRPCErrorError { @@ -1291,15 +1307,15 @@ impl CodexMessageProcessor { } }; - match self.conversation_manager.new_conversation(config).await { - Ok(conversation_id) => { - let NewConversation { - conversation_id, + match self.thread_manager.start_thread(config).await { + Ok(new_thread) => { + let NewThread { + thread_id, session_configured, .. - } = conversation_id; + } = new_thread; let response = NewConversationResponse { - conversation_id, + conversation_id: thread_id, model: session_configured.model, reasoning_effort: session_configured.reasoning_effort, rollout_path: session_configured.rollout_path, @@ -1318,7 +1334,7 @@ impl CodexMessageProcessor { } async fn thread_start(&mut self, request_id: RequestId, params: ThreadStartParams) { - let overrides = self.build_thread_config_overrides( + let typesafe_overrides = self.build_thread_config_overrides( params.model, params.model_provider, params.cwd, @@ -1328,23 +1344,26 @@ impl CodexMessageProcessor { params.developer_instructions, ); - let config = match derive_config_from_params(overrides, params.config).await { - Ok(config) => config, - Err(err) => { - let error = JSONRPCErrorError { - code: INVALID_REQUEST_ERROR_CODE, - message: format!("error deriving config: {err}"), - data: None, - }; - self.outgoing.send_error(request_id, error).await; - return; - } - }; + let config = + match derive_config_from_params(&self.cli_overrides, params.config, typesafe_overrides) + .await + { + Ok(config) => config, + Err(err) => { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("error deriving config: {err}"), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + } + }; - match self.conversation_manager.new_conversation(config).await { + match self.thread_manager.start_thread(config).await { Ok(new_conv) => { - let NewConversation { - conversation_id, + let NewThread { + thread_id, session_configured, .. } = new_conv; @@ -1352,7 +1371,7 @@ impl CodexMessageProcessor { let fallback_provider = self.config.model_provider_id.as_str(); // A bit hacky, but the summary contains a lot of useful information for the thread - // that unfortunately does not get returned from conversation_manager.new_conversation(). + // that unfortunately does not get returned from thread_manager.start_thread(). let thread = match read_summary_from_rollout( rollout_path.as_path(), fallback_provider, @@ -1364,7 +1383,7 @@ impl CodexMessageProcessor { self.send_internal_error( request_id, format!( - "failed to load rollout `{}` for conversation {conversation_id}: {err}", + "failed to load rollout `{}` for thread {thread_id}: {err}", rollout_path.display() ), ) @@ -1391,19 +1410,19 @@ impl CodexMessageProcessor { reasoning_effort: session_configured.reasoning_effort, }; - // Auto-attach a conversation listener when starting a thread. + // Auto-attach a thread listener when starting a thread. // Use the same behavior as the v1 API, with opt-in support for raw item events. if let Err(err) = self .attach_conversation_listener( - conversation_id, + thread_id, params.experimental_raw_events, ApiVersion::V2, ) .await { tracing::warn!( - "failed to attach listener for conversation {}: {}", - conversation_id, + "failed to attach listener for thread {}: {}", + thread_id, err.message ); } @@ -1452,7 +1471,7 @@ impl CodexMessageProcessor { } async fn thread_archive(&mut self, request_id: RequestId, params: ThreadArchiveParams) { - let conversation_id = match ConversationId::from_string(¶ms.thread_id) { + let thread_id = match ThreadId::from_string(¶ms.thread_id) { Ok(id) => id, Err(err) => { let error = JSONRPCErrorError { @@ -1465,44 +1484,83 @@ impl CodexMessageProcessor { } }; - let rollout_path = match find_conversation_path_by_id_str( - &self.config.codex_home, - &conversation_id.to_string(), - ) - .await - { - Ok(Some(p)) => p, - Ok(None) => { - let error = JSONRPCErrorError { - code: INVALID_REQUEST_ERROR_CODE, - message: format!("no rollout found for conversation id {conversation_id}"), - data: None, - }; - self.outgoing.send_error(request_id, error).await; - return; + let rollout_path = + match find_thread_path_by_id_str(&self.config.codex_home, &thread_id.to_string()).await + { + Ok(Some(p)) => p, + Ok(None) => { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("no rollout found for thread id {thread_id}"), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + } + Err(err) => { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("failed to locate thread id {thread_id}: {err}"), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + } + }; + + match self.archive_thread_common(thread_id, &rollout_path).await { + Ok(()) => { + let response = ThreadArchiveResponse {}; + self.outgoing.send_response(request_id, response).await; } Err(err) => { - let error = JSONRPCErrorError { - code: INVALID_REQUEST_ERROR_CODE, - message: format!("failed to locate conversation id {conversation_id}: {err}"), - data: None, - }; + self.outgoing.send_error(request_id, err).await; + } + } + } + + async fn thread_rollback(&mut self, request_id: RequestId, params: ThreadRollbackParams) { + let ThreadRollbackParams { + thread_id, + num_turns, + } = params; + + if num_turns == 0 { + self.send_invalid_request_error(request_id, "numTurns must be >= 1".to_string()) + .await; + return; + } + + let (thread_id, thread) = match self.load_thread(&thread_id).await { + Ok(v) => v, + Err(error) => { self.outgoing.send_error(request_id, error).await; return; } }; - match self - .archive_conversation_common(conversation_id, &rollout_path) - .await { - Ok(()) => { - let response = ThreadArchiveResponse {}; - self.outgoing.send_response(request_id, response).await; - } - Err(err) => { - self.outgoing.send_error(request_id, err).await; + let mut map = self.pending_rollbacks.lock().await; + if map.contains_key(&thread_id) { + self.send_invalid_request_error( + request_id, + "rollback already in progress for this thread".to_string(), + ) + .await; + return; } + + map.insert(thread_id, request_id.clone()); + } + + if let Err(err) = thread.submit(Op::ThreadRollback { num_turns }).await { + // No ThreadRollback event will arrive if an error occurs. + // Clean up and reply immediately. + let mut map = self.pending_rollbacks.lock().await; + map.remove(&thread_id); + + self.send_internal_error(request_id, format!("failed to start rollback: {err}")) + .await; } } @@ -1518,7 +1576,7 @@ impl CodexMessageProcessor { .unwrap_or(THREAD_LIST_DEFAULT_LIMIT) .clamp(1, THREAD_LIST_MAX_LIMIT); let (summaries, next_cursor) = match self - .list_conversations_common(requested_page_size, cursor, model_providers) + .list_threads_common(requested_page_size, cursor, model_providers) .await { Ok(r) => r, @@ -1543,7 +1601,7 @@ impl CodexMessageProcessor { cwd, approval_policy, sandbox, - config: cli_overrides, + config: request_overrides, base_instructions, developer_instructions, } = params; @@ -1553,12 +1611,12 @@ impl CodexMessageProcessor { || cwd.is_some() || approval_policy.is_some() || sandbox.is_some() - || cli_overrides.is_some() + || request_overrides.is_some() || base_instructions.is_some() || developer_instructions.is_some(); let config = if overrides_requested { - let overrides = self.build_thread_config_overrides( + let typesafe_overrides = self.build_thread_config_overrides( model, model_provider, cwd, @@ -1567,7 +1625,13 @@ impl CodexMessageProcessor { base_instructions, developer_instructions, ); - match derive_config_from_params(overrides, cli_overrides).await { + match derive_config_from_params( + &self.cli_overrides, + request_overrides, + typesafe_overrides, + ) + .await + { Ok(config) => config, Err(err) => { let error = JSONRPCErrorError { @@ -1583,7 +1647,7 @@ impl CodexMessageProcessor { self.config.as_ref().clone() }; - let conversation_history = if let Some(history) = history { + let thread_history = if let Some(history) = history { if history.is_empty() { self.send_invalid_request_error( request_id, @@ -1606,7 +1670,7 @@ impl CodexMessageProcessor { } } } else { - let existing_conversation_id = match ConversationId::from_string(&thread_id) { + let existing_thread_id = match ThreadId::from_string(&thread_id) { Ok(id) => id, Err(err) => { let error = JSONRPCErrorError { @@ -1619,9 +1683,9 @@ impl CodexMessageProcessor { } }; - let path = match find_conversation_path_by_id_str( + let path = match find_thread_path_by_id_str( &self.config.codex_home, - &existing_conversation_id.to_string(), + &existing_thread_id.to_string(), ) .await { @@ -1629,7 +1693,7 @@ impl CodexMessageProcessor { Ok(None) => { self.send_invalid_request_error( request_id, - format!("no rollout found for conversation id {existing_conversation_id}"), + format!("no rollout found for thread id {existing_thread_id}"), ) .await; return; @@ -1637,9 +1701,7 @@ impl CodexMessageProcessor { Err(err) => { self.send_invalid_request_error( request_id, - format!( - "failed to locate conversation id {existing_conversation_id}: {err}" - ), + format!("failed to locate thread id {existing_thread_id}: {err}"), ) .await; return; @@ -1662,16 +1724,12 @@ impl CodexMessageProcessor { let fallback_model_provider = config.model_provider_id.clone(); match self - .conversation_manager - .resume_conversation_with_history( - config, - conversation_history, - self.auth_manager.clone(), - ) + .thread_manager + .resume_thread_with_history(config, thread_history, self.auth_manager.clone()) .await { - Ok(NewConversation { - conversation_id, + Ok(NewThread { + thread_id, session_configured, .. }) => { @@ -1680,14 +1738,14 @@ impl CodexMessageProcessor { initial_messages, .. } = session_configured; - // Auto-attach a conversation listener when resuming a thread. + // Auto-attach a thread listener when resuming a thread. if let Err(err) = self - .attach_conversation_listener(conversation_id, false, ApiVersion::V2) + .attach_conversation_listener(thread_id, false, ApiVersion::V2) .await { tracing::warn!( - "failed to attach listener for conversation {}: {}", - conversation_id, + "failed to attach listener for thread {}: {}", + thread_id, err.message ); } @@ -1703,7 +1761,7 @@ impl CodexMessageProcessor { self.send_internal_error( request_id, format!( - "failed to load rollout `{}` for conversation {conversation_id}: {err}", + "failed to load rollout `{}` for thread {thread_id}: {err}", rollout_path.display() ), ) @@ -1738,7 +1796,7 @@ impl CodexMessageProcessor { } } - async fn get_conversation_summary( + async fn get_thread_summary( &self, request_id: RequestId, params: GetConversationSummaryParams, @@ -1751,8 +1809,8 @@ impl CodexMessageProcessor { rollout_path } } - GetConversationSummaryParams::ConversationId { conversation_id } => { - match codex_core::find_conversation_path_by_id_str( + GetConversationSummaryParams::ThreadId { conversation_id } => { + match codex_core::find_thread_path_by_id_str( &self.config.codex_home, &conversation_id.to_string(), ) @@ -1811,7 +1869,7 @@ impl CodexMessageProcessor { .clamp(1, THREAD_LIST_MAX_LIMIT); match self - .list_conversations_common(requested_page_size, cursor, model_providers) + .list_threads_common(requested_page_size, cursor, model_providers) .await { Ok((items, next_cursor)) => { @@ -1824,7 +1882,7 @@ impl CodexMessageProcessor { }; } - async fn list_conversations_common( + async fn list_threads_common( &self, requested_page_size: usize, cursor: Option, @@ -1850,7 +1908,7 @@ impl CodexMessageProcessor { while remaining > 0 { let page_size = remaining.min(THREAD_LIST_MAX_LIMIT); - let page = RolloutRecorder::list_conversations( + let page = RolloutRecorder::list_threads( &self.config.codex_home, page_size, cursor_obj.as_ref(), @@ -1861,7 +1919,7 @@ impl CodexMessageProcessor { .await .map_err(|err| JSONRPCErrorError { code: INTERNAL_ERROR_CODE, - message: format!("failed to list conversations: {err}"), + message: format!("failed to list threads: {err}"), data: None, })?; @@ -1917,7 +1975,7 @@ impl CodexMessageProcessor { async fn list_models( outgoing: Arc, - conversation_manager: Arc, + thread_manager: Arc, config: Arc, request_id: RequestId, params: ModelListParams, @@ -1925,7 +1983,7 @@ impl CodexMessageProcessor { let ModelListParams { limit, cursor } = params; let mut config = (*config).clone(); config.features.enable(Feature::RemoteModels); - let models = supported_models(conversation_manager, &config).await; + let models = supported_models(thread_manager, &config).await; let total = models.len(); if total == 0 { @@ -2197,7 +2255,7 @@ impl CodexMessageProcessor { cwd, approval_policy, sandbox: sandbox_mode, - config: cli_overrides, + config: request_overrides, base_instructions, developer_instructions, compact_prompt, @@ -2205,15 +2263,15 @@ impl CodexMessageProcessor { } = overrides; // Persist windows sandbox feature. - let mut cli_overrides = cli_overrides.unwrap_or_default(); + let mut request_overrides = request_overrides.unwrap_or_default(); if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) { - cli_overrides.insert( + request_overrides.insert( "features.experimental_windows_sandbox".to_string(), serde_json::json!(true), ); } - let overrides = ConfigOverrides { + let typesafe_overrides = ConfigOverrides { model, config_profile: profile, cwd: cwd.map(PathBuf::from), @@ -2228,7 +2286,12 @@ impl CodexMessageProcessor { ..Default::default() }; - derive_config_from_params(overrides, Some(cli_overrides)).await + derive_config_from_params( + &self.cli_overrides, + Some(request_overrides), + typesafe_overrides, + ) + .await } None => Ok(self.config.as_ref().clone()), }; @@ -2244,7 +2307,7 @@ impl CodexMessageProcessor { } }; - let conversation_history = if let Some(path) = path { + let thread_history = if let Some(path) = path { match RolloutRecorder::get_rollout_history(&path).await { Ok(initial_history) => initial_history, Err(err) => { @@ -2257,11 +2320,8 @@ impl CodexMessageProcessor { } } } else if let Some(conversation_id) = conversation_id { - match find_conversation_path_by_id_str( - &self.config.codex_home, - &conversation_id.to_string(), - ) - .await + match find_thread_path_by_id_str(&self.config.codex_home, &conversation_id.to_string()) + .await { Ok(Some(found_path)) => { match RolloutRecorder::get_rollout_history(&found_path).await { @@ -2313,16 +2373,12 @@ impl CodexMessageProcessor { }; match self - .conversation_manager - .resume_conversation_with_history( - config, - conversation_history, - self.auth_manager.clone(), - ) + .thread_manager + .resume_thread_with_history(config, thread_history, self.auth_manager.clone()) .await { - Ok(NewConversation { - conversation_id, + Ok(NewThread { + thread_id, session_configured, .. }) => { @@ -2343,9 +2399,9 @@ impl CodexMessageProcessor { .initial_messages .map(|msgs| msgs.into_iter().collect()); - // Reply with conversation id + model and initial messages (when present) + // Reply with thread id + model and initial messages (when present) let response = ResumeConversationResponse { - conversation_id, + conversation_id: thread_id, model: session_configured.model.clone(), initial_messages, rollout_path: session_configured.rollout_path.clone(), @@ -2387,32 +2443,26 @@ impl CodexMessageProcessor { params: ArchiveConversationParams, ) { let ArchiveConversationParams { - conversation_id, + conversation_id: thread_id, rollout_path, } = params; - match self - .archive_conversation_common(conversation_id, &rollout_path) - .await - { + match self.archive_thread_common(thread_id, &rollout_path).await { Ok(()) => { - tracing::info!("thread/archive succeeded for {conversation_id}"); + tracing::info!("thread/archive succeeded for {thread_id}"); let response = ArchiveConversationResponse {}; self.outgoing.send_response(request_id, response).await; } Err(err) => { - tracing::warn!( - "thread/archive failed for {conversation_id}: {}", - err.message - ); + tracing::warn!("thread/archive failed for {thread_id}: {}", err.message); self.outgoing.send_error(request_id, err).await; } } } - async fn archive_conversation_common( + async fn archive_thread_common( &mut self, - conversation_id: ConversationId, + thread_id: ThreadId, rollout_path: &Path, ) -> Result<(), JSONRPCErrorError> { // Verify rollout_path is under sessions dir. @@ -2424,7 +2474,7 @@ impl CodexMessageProcessor { return Err(JSONRPCErrorError { code: INTERNAL_ERROR_CODE, message: format!( - "failed to archive conversation: unable to resolve sessions directory: {err}" + "failed to archive thread: unable to resolve sessions directory: {err}" ), data: None, }); @@ -2446,8 +2496,8 @@ impl CodexMessageProcessor { }); }; - // Verify file name matches conversation id. - let required_suffix = format!("{conversation_id}.jsonl"); + // Verify file name matches thread id. + let required_suffix = format!("{thread_id}.jsonl"); let Some(file_name) = canonical_rollout_path.file_name().map(OsStr::to_owned) else { return Err(JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, @@ -2465,20 +2515,16 @@ impl CodexMessageProcessor { return Err(JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, message: format!( - "rollout path `{}` does not match conversation id {conversation_id}", + "rollout path `{}` does not match thread id {thread_id}", rollout_path.display() ), data: None, }); } - // If the conversation is active, request shutdown and wait briefly. - if let Some(conversation) = self - .conversation_manager - .remove_conversation(&conversation_id) - .await - { - info!("conversation {conversation_id} was active; shutting down"); + // If the thread is active, request shutdown and wait briefly. + if let Some(conversation) = self.thread_manager.remove_thread(&thread_id).await { + info!("thread {thread_id} was active; shutting down"); let conversation_clone = conversation.clone(); let notify = Arc::new(tokio::sync::Notify::new()); let notify_clone = notify.clone(); @@ -2513,7 +2559,7 @@ impl CodexMessageProcessor { // Normal shutdown: proceed with archive. } _ = tokio::time::sleep(Duration::from_secs(10)) => { - warn!("conversation {conversation_id} shutdown timed out; proceeding with archive"); + warn!("thread {thread_id} shutdown timed out; proceeding with archive"); // Wake any waiter; use notify_waiters to avoid missing the signal. notify.notify_waiters(); // Perhaps we lost a shutdown race, so let's continue to @@ -2522,7 +2568,7 @@ impl CodexMessageProcessor { } } Err(err) => { - error!("failed to submit Shutdown to conversation {conversation_id}: {err}"); + error!("failed to submit Shutdown to thread {thread_id}: {err}"); notify.notify_waiters(); } } @@ -2542,7 +2588,7 @@ impl CodexMessageProcessor { result.map_err(|err| JSONRPCErrorError { code: INTERNAL_ERROR_CODE, - message: format!("failed to archive conversation: {err}"), + message: format!("failed to archive thread: {err}"), data: None, }) } @@ -2552,11 +2598,7 @@ impl CodexMessageProcessor { conversation_id, items, } = params; - let Ok(conversation) = self - .conversation_manager - .get_conversation(conversation_id) - .await - else { + let Ok(conversation) = self.thread_manager.get_thread(conversation_id).await else { let error = JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, message: format!("conversation not found: {conversation_id}"), @@ -2579,6 +2621,7 @@ impl CodexMessageProcessor { let _ = conversation .submit(Op::UserInput { items: mapped_items, + final_output_json_schema: None, }) .await; @@ -2598,13 +2641,10 @@ impl CodexMessageProcessor { model, effort, summary, + output_schema, } = params; - let Ok(conversation) = self - .conversation_manager - .get_conversation(conversation_id) - .await - else { + let Ok(conversation) = self.thread_manager.get_thread(conversation_id).await else { let error = JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, message: format!("conversation not found: {conversation_id}"), @@ -2632,7 +2672,7 @@ impl CodexMessageProcessor { model, effort, summary, - final_output_json_schema: None, + final_output_json_schema: output_schema, }) .await; @@ -2649,20 +2689,18 @@ impl CodexMessageProcessor { cwds }; - let skills_manager = self.conversation_manager.skills_manager(); - let data = cwds - .into_iter() - .map(|cwd| { - let outcome = skills_manager.skills_for_cwd_with_options(&cwd, force_reload); - let errors = errors_to_info(&outcome.errors); - let skills = skills_to_info(&outcome.skills); - codex_app_server_protocol::SkillsListEntry { - cwd, - skills, - errors, - } - }) - .collect(); + let skills_manager = self.thread_manager.skills_manager(); + let mut data = Vec::new(); + for cwd in cwds { + let outcome = skills_manager.skills_for_cwd(&cwd, force_reload).await; + let errors = errors_to_info(&outcome.errors); + let skills = skills_to_info(&outcome.skills); + data.push(codex_app_server_protocol::SkillsListEntry { + cwd, + skills, + errors, + }); + } self.outgoing .send_response(request_id, SkillsListResponse { data }) .await; @@ -2674,11 +2712,7 @@ impl CodexMessageProcessor { params: InterruptConversationParams, ) { let InterruptConversationParams { conversation_id } = params; - let Ok(conversation) = self - .conversation_manager - .get_conversation(conversation_id) - .await - else { + let Ok(conversation) = self.thread_manager.get_thread(conversation_id).await else { let error = JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, message: format!("conversation not found: {conversation_id}"), @@ -2701,7 +2735,7 @@ impl CodexMessageProcessor { } async fn turn_start(&self, request_id: RequestId, params: TurnStartParams) { - let (_, conversation) = match self.conversation_from_thread_id(¶ms.thread_id).await { + let (_, thread) = match self.load_thread(¶ms.thread_id).await { Ok(v) => v, Err(error) => { self.outgoing.send_error(request_id, error).await; @@ -2725,7 +2759,7 @@ impl CodexMessageProcessor { // If any overrides are provided, update the session turn context first. if has_any_overrides { - let _ = conversation + let _ = thread .submit(Op::OverrideTurnContext { cwd: params.cwd, approval_policy: params.approval_policy.map(AskForApproval::to_core), @@ -2738,9 +2772,10 @@ impl CodexMessageProcessor { } // Start the turn by submitting the user input. Return its submission id as turn_id. - let turn_id = conversation + let turn_id = thread .submit(Op::UserInput { items: mapped_items, + final_output_json_schema: params.output_schema, }) .await; @@ -2823,14 +2858,12 @@ impl CodexMessageProcessor { async fn start_inline_review( &self, request_id: &RequestId, - parent_conversation: Arc, + parent_thread: Arc, review_request: ReviewRequest, display_text: &str, parent_thread_id: String, ) -> std::result::Result<(), JSONRPCErrorError> { - let turn_id = parent_conversation - .submit(Op::Review { review_request }) - .await; + let turn_id = parent_thread.submit(Op::Review { review_request }).await; match turn_id { Ok(turn_id) => { @@ -2855,56 +2888,54 @@ impl CodexMessageProcessor { async fn start_detached_review( &mut self, request_id: &RequestId, - parent_conversation_id: ConversationId, + parent_thread_id: ThreadId, review_request: ReviewRequest, display_text: &str, ) -> std::result::Result<(), JSONRPCErrorError> { - let rollout_path = find_conversation_path_by_id_str( - &self.config.codex_home, - &parent_conversation_id.to_string(), - ) - .await - .map_err(|err| JSONRPCErrorError { - code: INTERNAL_ERROR_CODE, - message: format!("failed to locate conversation id {parent_conversation_id}: {err}"), - data: None, - })? - .ok_or_else(|| JSONRPCErrorError { - code: INVALID_REQUEST_ERROR_CODE, - message: format!("no rollout found for conversation id {parent_conversation_id}"), - data: None, - })?; + let rollout_path = + find_thread_path_by_id_str(&self.config.codex_home, &parent_thread_id.to_string()) + .await + .map_err(|err| JSONRPCErrorError { + code: INTERNAL_ERROR_CODE, + message: format!("failed to locate thread id {parent_thread_id}: {err}"), + data: None, + })? + .ok_or_else(|| JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("no rollout found for thread id {parent_thread_id}"), + data: None, + })?; let mut config = self.config.as_ref().clone(); config.model = Some(self.config.review_model.clone()); - let NewConversation { - conversation_id, - conversation, + let NewThread { + thread_id, + thread: review_thread, session_configured, .. } = self - .conversation_manager - .fork_conversation(usize::MAX, config, rollout_path) + .thread_manager + .fork_thread(usize::MAX, config, rollout_path) .await .map_err(|err| JSONRPCErrorError { code: INTERNAL_ERROR_CODE, - message: format!("error creating detached review conversation: {err}"), + message: format!("error creating detached review thread: {err}"), data: None, })?; if let Err(err) = self - .attach_conversation_listener(conversation_id, false, ApiVersion::V2) + .attach_conversation_listener(thread_id, false, ApiVersion::V2) .await { tracing::warn!( - "failed to attach listener for review conversation {}: {}", - conversation_id, + "failed to attach listener for review thread {}: {}", + thread_id, err.message ); } - let rollout_path = conversation.rollout_path(); + let rollout_path = review_thread.rollout_path(); let fallback_provider = self.config.model_provider_id.as_str(); match read_summary_from_rollout(rollout_path.as_path(), fallback_provider).await { Ok(summary) => { @@ -2916,14 +2947,14 @@ impl CodexMessageProcessor { } Err(err) => { tracing::warn!( - "failed to load summary for review conversation {}: {}", + "failed to load summary for review thread {}: {}", session_configured.session_id, err ); } } - let turn_id = conversation + let turn_id = review_thread .submit(Op::Review { review_request }) .await .map_err(|err| JSONRPCErrorError { @@ -2933,7 +2964,7 @@ impl CodexMessageProcessor { })?; let turn = Self::build_review_turn(turn_id, display_text); - let review_thread_id = conversation_id.to_string(); + let review_thread_id = thread_id.to_string(); self.emit_review_started(request_id, turn, review_thread_id.clone(), review_thread_id) .await; @@ -2946,14 +2977,13 @@ impl CodexMessageProcessor { target, delivery, } = params; - let (parent_conversation_id, parent_conversation) = - match self.conversation_from_thread_id(&thread_id).await { - Ok(v) => v, - Err(error) => { - self.outgoing.send_error(request_id, error).await; - return; - } - }; + let (parent_thread_id, parent_thread) = match self.load_thread(&thread_id).await { + Ok(v) => v, + Err(error) => { + self.outgoing.send_error(request_id, error).await; + return; + } + }; let (review_request, display_text) = match Self::review_request_from_target(target) { Ok(value) => value, @@ -2969,7 +2999,7 @@ impl CodexMessageProcessor { if let Err(err) = self .start_inline_review( &request_id, - parent_conversation, + parent_thread, review_request, display_text.as_str(), thread_id.clone(), @@ -2983,7 +3013,7 @@ impl CodexMessageProcessor { if let Err(err) = self .start_detached_review( &request_id, - parent_conversation_id, + parent_thread_id, review_request, display_text.as_str(), ) @@ -2998,25 +3028,24 @@ impl CodexMessageProcessor { async fn turn_interrupt(&mut self, request_id: RequestId, params: TurnInterruptParams) { let TurnInterruptParams { thread_id, .. } = params; - let (conversation_id, conversation) = - match self.conversation_from_thread_id(&thread_id).await { - Ok(v) => v, - Err(error) => { - self.outgoing.send_error(request_id, error).await; - return; - } - }; + let (thread_uuid, thread) = match self.load_thread(&thread_id).await { + Ok(v) => v, + Err(error) => { + self.outgoing.send_error(request_id, error).await; + return; + } + }; // Record the pending interrupt so we can reply when TurnAborted arrives. { let mut map = self.pending_interrupts.lock().await; - map.entry(conversation_id) + map.entry(thread_uuid) .or_default() .push((request_id, ApiVersion::V2)); } // Submit the interrupt; we'll respond upon TurnAborted. - let _ = conversation.submit(Op::Interrupt).await; + let _ = thread.submit(Op::Interrupt).await; } async fn add_conversation_listener( @@ -3042,7 +3071,7 @@ impl CodexMessageProcessor { } } - async fn remove_conversation_listener( + async fn remove_thread_listener( &mut self, request_id: RequestId, params: RemoveConversationListenerParams, @@ -3068,20 +3097,16 @@ impl CodexMessageProcessor { async fn attach_conversation_listener( &mut self, - conversation_id: ConversationId, + conversation_id: ThreadId, experimental_raw_events: bool, api_version: ApiVersion, ) -> Result { - let conversation = match self - .conversation_manager - .get_conversation(conversation_id) - .await - { + let conversation = match self.thread_manager.get_thread(conversation_id).await { Ok(conv) => conv, Err(_) => { return Err(JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, - message: format!("conversation not found: {conversation_id}"), + message: format!("thread not found: {conversation_id}"), data: None, }); } @@ -3094,8 +3119,10 @@ impl CodexMessageProcessor { let outgoing_for_task = self.outgoing.clone(); let pending_interrupts = self.pending_interrupts.clone(); + let pending_rollbacks = self.pending_rollbacks.clone(); let turn_summary_store = self.turn_summary_store.clone(); let api_version_for_task = api_version; + let fallback_model_provider = self.config.model_provider_id.clone(); tokio::spawn(async move { loop { tokio::select! { @@ -3107,7 +3134,7 @@ impl CodexMessageProcessor { let event = match event { Ok(event) => event, Err(err) => { - tracing::warn!("conversation.next_event() failed with: {err}"); + tracing::warn!("thread.next_event() failed with: {err}"); break; } }; @@ -3151,8 +3178,10 @@ impl CodexMessageProcessor { conversation.clone(), outgoing_for_task.clone(), pending_interrupts.clone(), + pending_rollbacks.clone(), turn_summary_store.clone(), api_version_for_task, + fallback_model_provider.clone(), ) .await; } @@ -3232,7 +3261,7 @@ impl CodexMessageProcessor { } = params; let conversation_id = match thread_id.as_deref() { - Some(thread_id) => match ConversationId::from_string(thread_id) { + Some(thread_id) => match ThreadId::from_string(thread_id) { Ok(conversation_id) => Some(conversation_id), Err(err) => { let error = JSONRPCErrorError { @@ -3258,7 +3287,7 @@ impl CodexMessageProcessor { } else { None }; - let session_source = self.conversation_manager.session_source(); + let session_source = self.thread_manager.session_source(); let upload_result = tokio::task::spawn_blocking(move || { let rollout_path_ref = validated_rollout_path.as_deref(); @@ -3301,12 +3330,8 @@ impl CodexMessageProcessor { } } - async fn resolve_rollout_path(&self, conversation_id: ConversationId) -> Option { - match self - .conversation_manager - .get_conversation(conversation_id) - .await - { + async fn resolve_rollout_path(&self, conversation_id: ThreadId) -> Option { + match self.thread_manager.get_thread(conversation_id).await { Ok(conv) => Some(conv.rollout_path()), Err(_) => None, } @@ -3340,20 +3365,37 @@ fn errors_to_info( .collect() } +/// Derive the effective [`Config`] by layering three override sources. +/// +/// Precedence (lowest to highest): +/// - `cli_overrides`: process-wide startup `--config` flags. +/// - `request_overrides`: per-request dotted-path overrides (`params.config`), converted JSON->TOML. +/// - `typesafe_overrides`: Request objects such as `NewThreadParams` and +/// `ThreadStartParams` support a limited set of _explicit_ config overrides, so +/// `typesafe_overrides` is a `ConfigOverrides` derived from the respective request object. +/// Because the overrides are defined explicitly in the `*Params`, this takes priority over +/// the more general "bag of config options" provided by `cli_overrides` and `request_overrides`. async fn derive_config_from_params( - overrides: ConfigOverrides, - cli_overrides: Option>, + cli_overrides: &[(String, TomlValue)], + request_overrides: Option>, + typesafe_overrides: ConfigOverrides, ) -> std::io::Result { - let cli_overrides = cli_overrides - .unwrap_or_default() - .into_iter() - .map(|(k, v)| (k, json_to_toml(v))) - .collect(); + let merged_cli_overrides = cli_overrides + .iter() + .cloned() + .chain( + request_overrides + .unwrap_or_default() + .into_iter() + .map(|(k, v)| (k, json_to_toml(v))), + ) + .collect::>(); - Config::load_with_cli_overrides_and_harness_overrides(cli_overrides, overrides).await + Config::load_with_cli_overrides_and_harness_overrides(merged_cli_overrides, typesafe_overrides) + .await } -async fn read_summary_from_rollout( +pub(crate) async fn read_summary_from_rollout( path: &Path, fallback_provider: &str, ) -> std::io::Result { @@ -3412,6 +3454,24 @@ async fn read_summary_from_rollout( }) } +pub(crate) async fn read_event_msgs_from_rollout( + path: &Path, +) -> std::io::Result> { + let items = match RolloutRecorder::get_rollout_history(path).await? { + InitialHistory::New => Vec::new(), + InitialHistory::Forked(items) => items, + InitialHistory::Resumed(resumed) => resumed.history, + }; + + Ok(items + .into_iter() + .filter_map(|item| match item { + RolloutItem::EventMsg(event) => Some(event), + _ => None, + }) + .collect()) +} + fn extract_conversation_summary( path: PathBuf, head: &[serde_json::Value], @@ -3473,7 +3533,7 @@ fn parse_datetime(timestamp: Option<&str>) -> Option> { }) } -fn summary_to_thread(summary: ConversationSummary) -> Thread { +pub(crate) fn summary_to_thread(summary: ConversationSummary) -> Thread { let ConversationSummary { conversation_id, path, @@ -3518,7 +3578,7 @@ mod tests { #[test] fn extract_conversation_summary_prefers_plain_user_messages() -> Result<()> { - let conversation_id = ConversationId::from_string("3f941c35-29b3-493b-b0a4-e25800d9aeb0")?; + let conversation_id = ThreadId::from_string("3f941c35-29b3-493b-b0a4-e25800d9aeb0")?; let timestamp = Some("2025-09-05T16:53:11.850Z".to_string()); let path = PathBuf::from("rollout.jsonl"); @@ -3582,7 +3642,7 @@ mod tests { let temp_dir = TempDir::new()?; let path = temp_dir.path().join("rollout.jsonl"); - let conversation_id = ConversationId::from_string("bfd12a78-5900-467b-9bc5-d3d35df08191")?; + let conversation_id = ThreadId::from_string("bfd12a78-5900-467b-9bc5-d3d35df08191")?; let timestamp = "2025-09-05T16:53:11.850Z".to_string(); let session_meta = SessionMeta { diff --git a/codex-rs/app-server/src/config_api.rs b/codex-rs/app-server/src/config_api.rs index 98e0f108e93..25434ce92bf 100644 --- a/codex-rs/app-server/src/config_api.rs +++ b/codex-rs/app-server/src/config_api.rs @@ -3,12 +3,18 @@ use crate::error_code::INVALID_REQUEST_ERROR_CODE; use codex_app_server_protocol::ConfigBatchWriteParams; use codex_app_server_protocol::ConfigReadParams; use codex_app_server_protocol::ConfigReadResponse; +use codex_app_server_protocol::ConfigRequirements; +use codex_app_server_protocol::ConfigRequirementsReadResponse; use codex_app_server_protocol::ConfigValueWriteParams; use codex_app_server_protocol::ConfigWriteErrorCode; use codex_app_server_protocol::ConfigWriteResponse; use codex_app_server_protocol::JSONRPCErrorError; +use codex_app_server_protocol::SandboxMode; use codex_core::config::ConfigService; use codex_core::config::ConfigServiceError; +use codex_core::config_loader::ConfigRequirementsToml; +use codex_core::config_loader::LoaderOverrides; +use codex_core::config_loader::SandboxModeRequirement as CoreSandboxModeRequirement; use serde_json::json; use std::path::PathBuf; use toml::Value as TomlValue; @@ -19,9 +25,13 @@ pub(crate) struct ConfigApi { } impl ConfigApi { - pub(crate) fn new(codex_home: PathBuf, cli_overrides: Vec<(String, TomlValue)>) -> Self { + pub(crate) fn new( + codex_home: PathBuf, + cli_overrides: Vec<(String, TomlValue)>, + loader_overrides: LoaderOverrides, + ) -> Self { Self { - service: ConfigService::new(codex_home, cli_overrides), + service: ConfigService::new(codex_home, cli_overrides, loader_overrides), } } @@ -32,6 +42,19 @@ impl ConfigApi { self.service.read(params).await.map_err(map_error) } + pub(crate) async fn config_requirements_read( + &self, + ) -> Result { + let requirements = self + .service + .read_requirements() + .await + .map_err(map_error)? + .map(map_requirements_toml_to_api); + + Ok(ConfigRequirementsReadResponse { requirements }) + } + pub(crate) async fn write_value( &self, params: ConfigValueWriteParams, @@ -47,6 +70,32 @@ impl ConfigApi { } } +fn map_requirements_toml_to_api(requirements: ConfigRequirementsToml) -> ConfigRequirements { + ConfigRequirements { + allowed_approval_policies: requirements.allowed_approval_policies.map(|policies| { + policies + .into_iter() + .map(codex_app_server_protocol::AskForApproval::from) + .collect() + }), + allowed_sandbox_modes: requirements.allowed_sandbox_modes.map(|modes| { + modes + .into_iter() + .filter_map(map_sandbox_mode_requirement_to_api) + .collect() + }), + } +} + +fn map_sandbox_mode_requirement_to_api(mode: CoreSandboxModeRequirement) -> Option { + match mode { + CoreSandboxModeRequirement::ReadOnly => Some(SandboxMode::ReadOnly), + CoreSandboxModeRequirement::WorkspaceWrite => Some(SandboxMode::WorkspaceWrite), + CoreSandboxModeRequirement::DangerFullAccess => Some(SandboxMode::DangerFullAccess), + CoreSandboxModeRequirement::ExternalSandbox => None, + } +} + fn map_error(err: ConfigServiceError) -> JSONRPCErrorError { if let Some(code) = err.write_error_code() { return config_write_error(code, err.to_string()); @@ -68,3 +117,38 @@ fn config_write_error(code: ConfigWriteErrorCode, message: impl Into) -> })), } } + +#[cfg(test)] +mod tests { + use super::*; + use codex_protocol::protocol::AskForApproval as CoreAskForApproval; + use pretty_assertions::assert_eq; + + #[test] + fn map_requirements_toml_to_api_converts_core_enums() { + let requirements = ConfigRequirementsToml { + allowed_approval_policies: Some(vec![ + CoreAskForApproval::Never, + CoreAskForApproval::OnRequest, + ]), + allowed_sandbox_modes: Some(vec![ + CoreSandboxModeRequirement::ReadOnly, + CoreSandboxModeRequirement::ExternalSandbox, + ]), + }; + + let mapped = map_requirements_toml_to_api(requirements); + + assert_eq!( + mapped.allowed_approval_policies, + Some(vec![ + codex_app_server_protocol::AskForApproval::Never, + codex_app_server_protocol::AskForApproval::OnRequest, + ]) + ); + assert_eq!( + mapped.allowed_sandbox_modes, + Some(vec![SandboxMode::ReadOnly]), + ); + } +} diff --git a/codex-rs/app-server/src/lib.rs b/codex-rs/app-server/src/lib.rs index 224e0da10be..68663a991db 100644 --- a/codex-rs/app-server/src/lib.rs +++ b/codex-rs/app-server/src/lib.rs @@ -1,7 +1,8 @@ #![deny(clippy::print_stdout, clippy::print_stderr)] use codex_common::CliConfigOverrides; -use codex_core::config::Config; +use codex_core::config::ConfigBuilder; +use codex_core::config_loader::LoaderOverrides; use std::io::ErrorKind; use std::io::Result as IoResult; use std::path::PathBuf; @@ -42,6 +43,7 @@ const CHANNEL_CAPACITY: usize = 128; pub async fn run_main( codex_linux_sandbox_exe: Option, cli_config_overrides: CliConfigOverrides, + loader_overrides: LoaderOverrides, ) -> IoResult<()> { // Set up channels. let (incoming_tx, mut incoming_rx) = mpsc::channel::(CHANNEL_CAPACITY); @@ -78,7 +80,11 @@ pub async fn run_main( format!("error parsing -c overrides: {e}"), ) })?; - let config = Config::load_with_cli_overrides(cli_kv_overrides.clone()) + let loader_overrides_for_config_api = loader_overrides.clone(); + let config = ConfigBuilder::default() + .cli_overrides(cli_kv_overrides.clone()) + .loader_overrides(loader_overrides) + .build() .await .map_err(|e| { std::io::Error::new(ErrorKind::InvalidData, format!("error loading config: {e}")) @@ -120,11 +126,13 @@ pub async fn run_main( let processor_handle = tokio::spawn({ let outgoing_message_sender = OutgoingMessageSender::new(outgoing_tx); let cli_overrides: Vec<(String, TomlValue)> = cli_kv_overrides.clone(); + let loader_overrides = loader_overrides_for_config_api; let mut processor = MessageProcessor::new( outgoing_message_sender, codex_linux_sandbox_exe, std::sync::Arc::new(config), cli_overrides, + loader_overrides, feedback.clone(), ); async move { diff --git a/codex-rs/app-server/src/main.rs b/codex-rs/app-server/src/main.rs index 689ec0877a7..be57311e83d 100644 --- a/codex-rs/app-server/src/main.rs +++ b/codex-rs/app-server/src/main.rs @@ -1,10 +1,42 @@ use codex_app_server::run_main; use codex_arg0::arg0_dispatch_or_else; use codex_common::CliConfigOverrides; +use codex_core::config_loader::LoaderOverrides; +use std::path::PathBuf; + +// Debug-only test hook: lets integration tests point the server at a temporary +// managed config file without writing to /etc. +const MANAGED_CONFIG_PATH_ENV_VAR: &str = "CODEX_APP_SERVER_MANAGED_CONFIG_PATH"; fn main() -> anyhow::Result<()> { arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move { - run_main(codex_linux_sandbox_exe, CliConfigOverrides::default()).await?; + let managed_config_path = managed_config_path_from_debug_env(); + let loader_overrides = LoaderOverrides { + managed_config_path, + ..Default::default() + }; + + run_main( + codex_linux_sandbox_exe, + CliConfigOverrides::default(), + loader_overrides, + ) + .await?; Ok(()) }) } + +fn managed_config_path_from_debug_env() -> Option { + #[cfg(debug_assertions)] + { + if let Ok(value) = std::env::var(MANAGED_CONFIG_PATH_ENV_VAR) { + return if value.is_empty() { + None + } else { + Some(PathBuf::from(value)) + }; + } + } + + None +} diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 6a6cf5edb25..60e938bb18f 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -18,8 +18,9 @@ use codex_app_server_protocol::JSONRPCRequest; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::RequestId; use codex_core::AuthManager; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::config::Config; +use codex_core::config_loader::LoaderOverrides; use codex_core::default_client::USER_AGENT_SUFFIX; use codex_core::default_client::get_codex_user_agent; use codex_feedback::CodexFeedback; @@ -41,6 +42,7 @@ impl MessageProcessor { codex_linux_sandbox_exe: Option, config: Arc, cli_overrides: Vec<(String, TomlValue)>, + loader_overrides: LoaderOverrides, feedback: CodexFeedback, ) -> Self { let outgoing = Arc::new(outgoing); @@ -49,20 +51,21 @@ impl MessageProcessor { false, config.cli_auth_credentials_store_mode, ); - let conversation_manager = Arc::new(ConversationManager::new( + let thread_manager = Arc::new(ThreadManager::new( + config.codex_home.clone(), auth_manager.clone(), SessionSource::VSCode, )); let codex_message_processor = CodexMessageProcessor::new( auth_manager, - conversation_manager, + thread_manager, outgoing.clone(), codex_linux_sandbox_exe, Arc::clone(&config), cli_overrides.clone(), feedback, ); - let config_api = ConfigApi::new(config.codex_home.clone(), cli_overrides); + let config_api = ConfigApi::new(config.codex_home.clone(), cli_overrides, loader_overrides); Self { outgoing, @@ -155,6 +158,12 @@ impl MessageProcessor { ClientRequest::ConfigBatchWrite { request_id, params } => { self.handle_config_batch_write(request_id, params).await; } + ClientRequest::ConfigRequirementsRead { + request_id, + params: _, + } => { + self.handle_config_requirements_read(request_id).await; + } other => { self.codex_message_processor.process_request(other).await; } @@ -207,4 +216,11 @@ impl MessageProcessor { Err(error) => self.outgoing.send_error(request_id, error).await, } } + + async fn handle_config_requirements_read(&self, request_id: RequestId) { + match self.config_api.config_requirements_read().await { + Ok(response) => self.outgoing.send_response(request_id, response).await, + Err(error) => self.outgoing.send_error(request_id, error).await, + } + } } diff --git a/codex-rs/app-server/src/models.rs b/codex-rs/app-server/src/models.rs index 21411603547..906108c50b7 100644 --- a/codex-rs/app-server/src/models.rs +++ b/codex-rs/app-server/src/models.rs @@ -2,19 +2,17 @@ use std::sync::Arc; use codex_app_server_protocol::Model; use codex_app_server_protocol::ReasoningEffortOption; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::config::Config; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffortPreset; -pub async fn supported_models( - conversation_manager: Arc, - config: &Config, -) -> Vec { - conversation_manager +pub async fn supported_models(thread_manager: Arc, config: &Config) -> Vec { + thread_manager .list_models(config) .await .into_iter() + .filter(|preset| preset.show_in_picker) .map(model_from_preset) .collect() } diff --git a/codex-rs/app-server/tests/common/mcp_process.rs b/codex-rs/app-server/tests/common/mcp_process.rs index 98b2cabaaa0..f3ec682fb21 100644 --- a/codex-rs/app-server/tests/common/mcp_process.rs +++ b/codex-rs/app-server/tests/common/mcp_process.rs @@ -45,6 +45,7 @@ use codex_app_server_protocol::SetDefaultModelParams; use codex_app_server_protocol::ThreadArchiveParams; use codex_app_server_protocol::ThreadListParams; use codex_app_server_protocol::ThreadResumeParams; +use codex_app_server_protocol::ThreadRollbackParams; use codex_app_server_protocol::ThreadStartParams; use codex_app_server_protocol::TurnInterruptParams; use codex_app_server_protocol::TurnStartParams; @@ -197,7 +198,7 @@ impl McpProcess { } /// Send a `removeConversationListener` JSON-RPC request. - pub async fn send_remove_conversation_listener_request( + pub async fn send_remove_thread_listener_request( &mut self, params: RemoveConversationListenerParams, ) -> anyhow::Result { @@ -316,6 +317,15 @@ impl McpProcess { self.send_request("thread/archive", params).await } + /// Send a `thread/rollback` JSON-RPC request. + pub async fn send_thread_rollback_request( + &mut self, + params: ThreadRollbackParams, + ) -> anyhow::Result { + let params = Some(serde_json::to_value(params)?); + self.send_request("thread/rollback", params).await + } + /// Send a `thread/list` JSON-RPC request. pub async fn send_thread_list_request( &mut self, diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index 0a977aa7c28..31b614ce5d8 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -15,7 +15,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { slug: preset.id.clone(), display_name: preset.display_name.clone(), description: Some(preset.description.clone()), - default_reasoning_level: preset.default_reasoning_effort, + default_reasoning_level: Some(preset.default_reasoning_effort), supported_reasoning_levels: preset.supported_reasoning_efforts.clone(), shell_type: ConfigShellToolType::ShellCommand, visibility: if preset.show_in_picker { @@ -26,19 +26,20 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { supported_in_api: true, priority, upgrade: preset.upgrade.as_ref().map(|u| u.id.clone()), - base_instructions: None, + base_instructions: "base instructions".to_string(), supports_reasoning_summaries: false, support_verbosity: false, default_verbosity: None, apply_patch_tool_type: None, truncation_policy: TruncationPolicyConfig::bytes(10_000), supports_parallel_tool_calls: false, - context_window: None, + context_window: Some(272_000), + auto_compact_token_limit: None, + effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), } } -// todo(aibrahim): fix the priorities to be the opposite here. /// Write a models_cache.json file to the codex home directory. /// This prevents ModelsManager from making network requests to refresh models. /// The cache will be treated as fresh (within TTL) and used instead of fetching from the network. @@ -49,14 +50,14 @@ pub fn write_models_cache(codex_home: &Path) -> std::io::Result<()> { .iter() .filter(|preset| preset.show_in_picker) .collect(); - // Convert presets to ModelInfo, assigning priorities (higher = earlier in list) - // Priority is used for sorting, so first model gets highest priority + // Convert presets to ModelInfo, assigning priorities (lower = earlier in list). + // Priority is used for sorting, so the first model gets the lowest priority. let models: Vec = presets .iter() .enumerate() .map(|(idx, preset)| { - // Higher priority = earlier in list, so reverse the index - let priority = (presets.len() - idx) as i32; + // Lower priority = earlier in list. + let priority = idx as i32; preset_to_info(preset, priority) }) .collect(); diff --git a/codex-rs/app-server/tests/common/rollout.rs b/codex-rs/app-server/tests/common/rollout.rs index 52035e4ede7..40aab240bd6 100644 --- a/codex-rs/app-server/tests/common/rollout.rs +++ b/codex-rs/app-server/tests/common/rollout.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::protocol::GitInfo; use codex_protocol::protocol::SessionMeta; use codex_protocol::protocol::SessionMetaLine; @@ -28,7 +28,7 @@ pub fn create_fake_rollout( ) -> Result { let uuid = Uuid::new_v4(); let uuid_str = uuid.to_string(); - let conversation_id = ConversationId::from_string(&uuid_str)?; + let conversation_id = ThreadId::from_string(&uuid_str)?; // sessions/YYYY/MM/DD derived from filename_ts (YYYY-MM-DDThh-mm-ss) let year = &filename_ts[0..4]; @@ -44,6 +44,7 @@ pub fn create_fake_rollout( id: conversation_id, timestamp: meta_rfc3339.to_string(), cwd: PathBuf::from("/"), + name: None, originator: "codex".to_string(), cli_version: "0.0.0".to_string(), instructions: None, diff --git a/codex-rs/app-server/tests/suite/archive_conversation.rs b/codex-rs/app-server/tests/suite/archive_thread.rs similarity index 100% rename from codex-rs/app-server/tests/suite/archive_conversation.rs rename to codex-rs/app-server/tests/suite/archive_thread.rs diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index be94dd822e1..a508bf88057 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -145,9 +145,7 @@ async fn test_codex_jsonrpc_conversation_flow() -> Result<()> { // 4) removeConversationListener let remove_listener_id = mcp - .send_remove_conversation_listener_request(RemoveConversationListenerParams { - subscription_id, - }) + .send_remove_thread_listener_request(RemoveConversationListenerParams { subscription_id }) .await?; let remove_listener_resp: JSONRPCResponse = timeout( DEFAULT_READ_TIMEOUT, @@ -305,6 +303,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> { model: "mock-model".to_string(), effort: Some(ReasoningEffort::Medium), summary: ReasoningSummary::Auto, + output_schema: None, }) .await?; // Acknowledge sendUserTurn @@ -418,6 +417,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<( model: model.clone(), effort: Some(ReasoningEffort::Medium), summary: ReasoningSummary::Auto, + output_schema: None, }) .await?; timeout( @@ -443,6 +443,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<( model: model.clone(), effort: Some(ReasoningEffort::Medium), summary: ReasoningSummary::Auto, + output_schema: None, }) .await?; timeout( diff --git a/codex-rs/app-server/tests/suite/create_conversation.rs b/codex-rs/app-server/tests/suite/create_thread.rs similarity index 100% rename from codex-rs/app-server/tests/suite/create_conversation.rs rename to codex-rs/app-server/tests/suite/create_thread.rs diff --git a/codex-rs/app-server/tests/suite/list_resume.rs b/codex-rs/app-server/tests/suite/list_resume.rs index 34e737437ca..983553e06ec 100644 --- a/codex-rs/app-server/tests/suite/list_resume.rs +++ b/codex-rs/app-server/tests/suite/list_resume.rs @@ -6,7 +6,7 @@ use codex_app_server_protocol::JSONRPCNotification; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::ListConversationsParams; use codex_app_server_protocol::ListConversationsResponse; -use codex_app_server_protocol::NewConversationParams; // reused for overrides shape +use codex_app_server_protocol::NewConversationParams; use codex_app_server_protocol::RequestId; use codex_app_server_protocol::ResumeConversationParams; use codex_app_server_protocol::ResumeConversationResponse; diff --git a/codex-rs/app-server/tests/suite/mod.rs b/codex-rs/app-server/tests/suite/mod.rs index 37f7659f422..41d6f83b957 100644 --- a/codex-rs/app-server/tests/suite/mod.rs +++ b/codex-rs/app-server/tests/suite/mod.rs @@ -1,12 +1,13 @@ -mod archive_conversation; +mod archive_thread; mod auth; mod codex_message_processor_flow; mod config; -mod create_conversation; +mod create_thread; mod fuzzy_file_search; mod interrupt; mod list_resume; mod login; +mod output_schema; mod send_message; mod set_default_model; mod user_agent; diff --git a/codex-rs/app-server/tests/suite/output_schema.rs b/codex-rs/app-server/tests/suite/output_schema.rs new file mode 100644 index 00000000000..4ec500a245c --- /dev/null +++ b/codex-rs/app-server/tests/suite/output_schema.rs @@ -0,0 +1,282 @@ +use anyhow::Result; +use app_test_support::McpProcess; +use app_test_support::to_response; +use codex_app_server_protocol::AddConversationListenerParams; +use codex_app_server_protocol::InputItem; +use codex_app_server_protocol::JSONRPCResponse; +use codex_app_server_protocol::NewConversationParams; +use codex_app_server_protocol::NewConversationResponse; +use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::SendUserTurnParams; +use codex_app_server_protocol::SendUserTurnResponse; +use codex_core::protocol::AskForApproval; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::openai_models::ReasoningEffort; +use core_test_support::responses; +use core_test_support::skip_if_no_network; +use pretty_assertions::assert_eq; +use std::path::Path; +use tempfile::TempDir; +use tokio::time::timeout; + +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); + +#[tokio::test] +async fn send_user_turn_accepts_output_schema_v1() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let body = responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_assistant_message("msg-1", "Done"), + responses::ev_completed("resp-1"), + ]); + let response_mock = responses::mount_sse_once(&server, body).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let new_conv_id = mcp + .send_new_conversation_request(NewConversationParams { + ..Default::default() + }) + .await?; + let new_conv_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)), + ) + .await??; + let NewConversationResponse { + conversation_id, .. + } = to_response::(new_conv_resp)?; + + let listener_id = mcp + .send_add_conversation_listener_request(AddConversationListenerParams { + conversation_id, + experimental_raw_events: false, + }) + .await?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(listener_id)), + ) + .await??; + + let output_schema = serde_json::json!({ + "type": "object", + "properties": { + "answer": { "type": "string" } + }, + "required": ["answer"], + "additionalProperties": false + }); + + let send_turn_id = mcp + .send_send_user_turn_request(SendUserTurnParams { + conversation_id, + items: vec![InputItem::Text { + text: "Hello".to_string(), + }], + cwd: codex_home.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: "mock-model".to_string(), + effort: Some(ReasoningEffort::Medium), + summary: ReasoningSummary::Auto, + output_schema: Some(output_schema.clone()), + }) + .await?; + let _send_turn_resp: SendUserTurnResponse = to_response::( + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)), + ) + .await??, + )?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await??; + + let request = response_mock.single_request(); + let payload = request.body_json(); + let text = payload.get("text").expect("request missing text field"); + let format = text + .get("format") + .expect("request missing text.format field"); + assert_eq!( + format, + &serde_json::json!({ + "name": "codex_output_schema", + "type": "json_schema", + "strict": true, + "schema": output_schema, + }) + ); + + Ok(()) +} + +#[tokio::test] +async fn send_user_turn_output_schema_is_per_turn_v1() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let body1 = responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_assistant_message("msg-1", "Done"), + responses::ev_completed("resp-1"), + ]); + let response_mock1 = responses::mount_sse_once(&server, body1).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let new_conv_id = mcp + .send_new_conversation_request(NewConversationParams { + ..Default::default() + }) + .await?; + let new_conv_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)), + ) + .await??; + let NewConversationResponse { + conversation_id, .. + } = to_response::(new_conv_resp)?; + + let listener_id = mcp + .send_add_conversation_listener_request(AddConversationListenerParams { + conversation_id, + experimental_raw_events: false, + }) + .await?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(listener_id)), + ) + .await??; + + let output_schema = serde_json::json!({ + "type": "object", + "properties": { + "answer": { "type": "string" } + }, + "required": ["answer"], + "additionalProperties": false + }); + + let send_turn_id = mcp + .send_send_user_turn_request(SendUserTurnParams { + conversation_id, + items: vec![InputItem::Text { + text: "Hello".to_string(), + }], + cwd: codex_home.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: "mock-model".to_string(), + effort: Some(ReasoningEffort::Medium), + summary: ReasoningSummary::Auto, + output_schema: Some(output_schema.clone()), + }) + .await?; + let _send_turn_resp: SendUserTurnResponse = to_response::( + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)), + ) + .await??, + )?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await??; + + let payload1 = response_mock1.single_request().body_json(); + assert_eq!( + payload1.pointer("/text/format"), + Some(&serde_json::json!({ + "name": "codex_output_schema", + "type": "json_schema", + "strict": true, + "schema": output_schema, + })) + ); + + let body2 = responses::sse(vec![ + responses::ev_response_created("resp-2"), + responses::ev_assistant_message("msg-2", "Done"), + responses::ev_completed("resp-2"), + ]); + let response_mock2 = responses::mount_sse_once(&server, body2).await; + + let send_turn_id_2 = mcp + .send_send_user_turn_request(SendUserTurnParams { + conversation_id, + items: vec![InputItem::Text { + text: "Hello again".to_string(), + }], + cwd: codex_home.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: "mock-model".to_string(), + effort: Some(ReasoningEffort::Medium), + summary: ReasoningSummary::Auto, + output_schema: None, + }) + .await?; + let _send_turn_resp_2: SendUserTurnResponse = to_response::( + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id_2)), + ) + .await??, + )?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await??; + + let payload2 = response_mock2.single_request().body_json(); + assert_eq!(payload2.pointer("/text/format"), None); + + Ok(()) +} + +fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> { + let config_toml = codex_home.join("config.toml"); + std::fs::write( + config_toml, + format!( + r#" +model = "mock-model" +approval_policy = "never" +sandbox_mode = "read-only" + +model_provider = "mock_provider" + +[model_providers.mock_provider] +name = "Mock provider for test" +base_url = "{server_uri}/v1" +wire_api = "responses" +request_max_retries = 0 +stream_max_retries = 0 +"# + ), + ) +} diff --git a/codex-rs/app-server/tests/suite/send_message.rs b/codex-rs/app-server/tests/suite/send_message.rs index 39b3a31a8ae..ed93f8a7f3e 100644 --- a/codex-rs/app-server/tests/suite/send_message.rs +++ b/codex-rs/app-server/tests/suite/send_message.rs @@ -13,7 +13,7 @@ use codex_app_server_protocol::NewConversationResponse; use codex_app_server_protocol::RequestId; use codex_app_server_protocol::SendUserMessageParams; use codex_app_server_protocol::SendUserMessageResponse; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::RawResponseItemEvent; @@ -81,7 +81,7 @@ async fn test_send_message_success() -> Result<()> { #[expect(clippy::expect_used)] async fn send_message( message: &str, - conversation_id: ConversationId, + conversation_id: ThreadId, mcp: &mut McpProcess, ) -> Result<()> { // Now exercise sendUserMessage. @@ -220,7 +220,7 @@ async fn test_send_message_session_not_found() -> Result<()> { let mut mcp = McpProcess::new(codex_home.path()).await?; timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; - let unknown = ConversationId::new(); + let unknown = ThreadId::new(); let req_id = mcp .send_send_user_message_request(SendUserMessageParams { conversation_id: unknown, @@ -268,10 +268,7 @@ stream_max_retries = 0 } #[expect(clippy::expect_used)] -async fn read_raw_response_item( - mcp: &mut McpProcess, - conversation_id: ConversationId, -) -> ResponseItem { +async fn read_raw_response_item(mcp: &mut McpProcess, conversation_id: ThreadId) -> ResponseItem { loop { let raw_notification: JSONRPCNotification = timeout( DEFAULT_READ_TIMEOUT, diff --git a/codex-rs/app-server/tests/suite/v2/config_rpc.rs b/codex-rs/app-server/tests/suite/v2/config_rpc.rs index c0be58f50c7..18311d324b8 100644 --- a/codex-rs/app-server/tests/suite/v2/config_rpc.rs +++ b/codex-rs/app-server/tests/suite/v2/config_rpc.rs @@ -184,7 +184,10 @@ writable_roots = [{}] let mut mcp = McpProcess::new_with_env( codex_home.path(), - &[("CODEX_MANAGED_CONFIG_PATH", Some(&managed_path_str))], + &[( + "CODEX_APP_SERVER_MANAGED_CONFIG_PATH", + Some(&managed_path_str), + )], ) .await?; timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; diff --git a/codex-rs/app-server/tests/suite/v2/mod.rs b/codex-rs/app-server/tests/suite/v2/mod.rs index 16d2142b2eb..1ef00c6939d 100644 --- a/codex-rs/app-server/tests/suite/v2/mod.rs +++ b/codex-rs/app-server/tests/suite/v2/mod.rs @@ -1,11 +1,13 @@ mod account; mod config_rpc; mod model_list; +mod output_schema; mod rate_limits; mod review; mod thread_archive; mod thread_list; mod thread_resume; +mod thread_rollback; mod thread_start; mod turn_interrupt; mod turn_start; diff --git a/codex-rs/app-server/tests/suite/v2/model_list.rs b/codex-rs/app-server/tests/suite/v2/model_list.rs index e9fe70dbee5..c98da19345d 100644 --- a/codex-rs/app-server/tests/suite/v2/model_list.rs +++ b/codex-rs/app-server/tests/suite/v2/model_list.rs @@ -48,57 +48,32 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { let expected_models = vec![ Model { - id: "gpt-5.2".to_string(), - model: "gpt-5.2".to_string(), - display_name: "gpt-5.2".to_string(), - description: - "Latest frontier model with improvements across knowledge, reasoning and coding" - .to_string(), + id: "gpt-5.2-codex".to_string(), + model: "gpt-5.2-codex".to_string(), + display_name: "gpt-5.2-codex".to_string(), + description: "Latest frontier agentic coding model.".to_string(), supported_reasoning_efforts: vec![ ReasoningEffortOption { reasoning_effort: ReasoningEffort::Low, - description: "Balances speed with some reasoning; useful for straightforward \ - queries and short explanations" - .to_string(), + description: "Fast responses with lighter reasoning".to_string(), }, ReasoningEffortOption { reasoning_effort: ReasoningEffort::Medium, - description: "Provides a solid balance of reasoning depth and latency for \ - general-purpose tasks" + description: "Balances speed and reasoning depth for everyday tasks" .to_string(), }, ReasoningEffortOption { reasoning_effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems" - .to_string(), + description: "Greater reasoning depth for complex problems".to_string(), }, ReasoningEffortOption { reasoning_effort: ReasoningEffort::XHigh, - description: "Extra high reasoning for complex problems".to_string(), + description: "Extra high reasoning depth for complex problems".to_string(), }, ], default_reasoning_effort: ReasoningEffort::Medium, is_default: true, }, - Model { - id: "gpt-5.1-codex-mini".to_string(), - model: "gpt-5.1-codex-mini".to_string(), - display_name: "gpt-5.1-codex-mini".to_string(), - description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(), - supported_reasoning_efforts: vec![ - ReasoningEffortOption { - reasoning_effort: ReasoningEffort::Medium, - description: "Dynamically adjusts reasoning based on the task".to_string(), - }, - ReasoningEffortOption { - reasoning_effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems" - .to_string(), - }, - ], - default_reasoning_effort: ReasoningEffort::Medium, - is_default: false, - }, Model { id: "gpt-5.1-codex-max".to_string(), model: "gpt-5.1-codex-max".to_string(), @@ -127,23 +102,48 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { is_default: false, }, Model { - id: "gpt-5.2-codex".to_string(), - model: "gpt-5.2-codex".to_string(), - display_name: "gpt-5.2-codex".to_string(), - description: "Latest frontier agentic coding model.".to_string(), + id: "gpt-5.1-codex-mini".to_string(), + model: "gpt-5.1-codex-mini".to_string(), + display_name: "gpt-5.1-codex-mini".to_string(), + description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(), + supported_reasoning_efforts: vec![ + ReasoningEffortOption { + reasoning_effort: ReasoningEffort::Medium, + description: "Dynamically adjusts reasoning based on the task".to_string(), + }, + ReasoningEffortOption { + reasoning_effort: ReasoningEffort::High, + description: "Maximizes reasoning depth for complex or ambiguous problems" + .to_string(), + }, + ], + default_reasoning_effort: ReasoningEffort::Medium, + is_default: false, + }, + Model { + id: "gpt-5.2".to_string(), + model: "gpt-5.2".to_string(), + display_name: "gpt-5.2".to_string(), + description: + "Latest frontier model with improvements across knowledge, reasoning and coding" + .to_string(), supported_reasoning_efforts: vec![ ReasoningEffortOption { reasoning_effort: ReasoningEffort::Low, - description: "Fast responses with lighter reasoning".to_string(), + description: "Balances speed with some reasoning; useful for straightforward \ + queries and short explanations" + .to_string(), }, ReasoningEffortOption { reasoning_effort: ReasoningEffort::Medium, - description: "Balances speed and reasoning depth for everyday tasks" + description: "Provides a solid balance of reasoning depth and latency for \ + general-purpose tasks" .to_string(), }, ReasoningEffortOption { reasoning_effort: ReasoningEffort::High, - description: "Greater reasoning depth for complex problems".to_string(), + description: "Maximizes reasoning depth for complex or ambiguous problems" + .to_string(), }, ReasoningEffortOption { reasoning_effort: ReasoningEffort::XHigh, @@ -187,7 +187,7 @@ async fn list_models_pagination_works() -> Result<()> { } = to_response::(first_response)?; assert_eq!(first_items.len(), 1); - assert_eq!(first_items[0].id, "gpt-5.2"); + assert_eq!(first_items[0].id, "gpt-5.2-codex"); let next_cursor = first_cursor.ok_or_else(|| anyhow!("cursor for second page"))?; let second_request = mcp @@ -209,7 +209,7 @@ async fn list_models_pagination_works() -> Result<()> { } = to_response::(second_response)?; assert_eq!(second_items.len(), 1); - assert_eq!(second_items[0].id, "gpt-5.1-codex-mini"); + assert_eq!(second_items[0].id, "gpt-5.1-codex-max"); let third_cursor = second_cursor.ok_or_else(|| anyhow!("cursor for third page"))?; let third_request = mcp @@ -231,7 +231,7 @@ async fn list_models_pagination_works() -> Result<()> { } = to_response::(third_response)?; assert_eq!(third_items.len(), 1); - assert_eq!(third_items[0].id, "gpt-5.1-codex-max"); + assert_eq!(third_items[0].id, "gpt-5.1-codex-mini"); let fourth_cursor = third_cursor.ok_or_else(|| anyhow!("cursor for fourth page"))?; let fourth_request = mcp @@ -253,7 +253,7 @@ async fn list_models_pagination_works() -> Result<()> { } = to_response::(fourth_response)?; assert_eq!(fourth_items.len(), 1); - assert_eq!(fourth_items[0].id, "gpt-5.2-codex"); + assert_eq!(fourth_items[0].id, "gpt-5.2"); assert!(fourth_cursor.is_none()); Ok(()) } diff --git a/codex-rs/app-server/tests/suite/v2/output_schema.rs b/codex-rs/app-server/tests/suite/v2/output_schema.rs new file mode 100644 index 00000000000..f23c0370377 --- /dev/null +++ b/codex-rs/app-server/tests/suite/v2/output_schema.rs @@ -0,0 +1,231 @@ +use anyhow::Result; +use app_test_support::McpProcess; +use app_test_support::to_response; +use codex_app_server_protocol::JSONRPCResponse; +use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::ThreadStartParams; +use codex_app_server_protocol::ThreadStartResponse; +use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::TurnStartResponse; +use codex_app_server_protocol::UserInput as V2UserInput; +use core_test_support::responses; +use core_test_support::skip_if_no_network; +use pretty_assertions::assert_eq; +use std::path::Path; +use tempfile::TempDir; +use tokio::time::timeout; + +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); + +#[tokio::test] +async fn turn_start_accepts_output_schema_v2() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let body = responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_assistant_message("msg-1", "Done"), + responses::ev_completed("resp-1"), + ]); + let response_mock = responses::mount_sse_once(&server, body).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let output_schema = serde_json::json!({ + "type": "object", + "properties": { + "answer": { "type": "string" } + }, + "required": ["answer"], + "additionalProperties": false + }); + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Hello".to_string(), + }], + output_schema: Some(output_schema.clone()), + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let _turn: TurnStartResponse = to_response::(turn_resp)?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let request = response_mock.single_request(); + let payload = request.body_json(); + let text = payload.get("text").expect("request missing text field"); + let format = text + .get("format") + .expect("request missing text.format field"); + assert_eq!( + format, + &serde_json::json!({ + "name": "codex_output_schema", + "type": "json_schema", + "strict": true, + "schema": output_schema, + }) + ); + + Ok(()) +} + +#[tokio::test] +async fn turn_start_output_schema_is_per_turn_v2() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let body1 = responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_assistant_message("msg-1", "Done"), + responses::ev_completed("resp-1"), + ]); + let response_mock1 = responses::mount_sse_once(&server, body1).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let output_schema = serde_json::json!({ + "type": "object", + "properties": { + "answer": { "type": "string" } + }, + "required": ["answer"], + "additionalProperties": false + }); + + let turn_req_1 = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Hello".to_string(), + }], + output_schema: Some(output_schema.clone()), + ..Default::default() + }) + .await?; + let turn_resp_1: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req_1)), + ) + .await??; + let _turn: TurnStartResponse = to_response::(turn_resp_1)?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let payload1 = response_mock1.single_request().body_json(); + assert_eq!( + payload1.pointer("/text/format"), + Some(&serde_json::json!({ + "name": "codex_output_schema", + "type": "json_schema", + "strict": true, + "schema": output_schema, + })) + ); + + let body2 = responses::sse(vec![ + responses::ev_response_created("resp-2"), + responses::ev_assistant_message("msg-2", "Done"), + responses::ev_completed("resp-2"), + ]); + let response_mock2 = responses::mount_sse_once(&server, body2).await; + + let turn_req_2 = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Hello again".to_string(), + }], + output_schema: None, + ..Default::default() + }) + .await?; + let turn_resp_2: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req_2)), + ) + .await??; + let _turn: TurnStartResponse = to_response::(turn_resp_2)?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let payload2 = response_mock2.single_request().body_json(); + assert_eq!(payload2.pointer("/text/format"), None); + + Ok(()) +} + +fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> { + let config_toml = codex_home.join("config.toml"); + std::fs::write( + config_toml, + format!( + r#" +model = "mock-model" +approval_policy = "never" +sandbox_mode = "read-only" + +model_provider = "mock_provider" + +[model_providers.mock_provider] +name = "Mock provider for test" +base_url = "{server_uri}/v1" +wire_api = "responses" +request_max_retries = 0 +stream_max_retries = 0 +"# + ), + ) +} diff --git a/codex-rs/app-server/tests/suite/v2/thread_archive.rs b/codex-rs/app-server/tests/suite/v2/thread_archive.rs index 88891af77d5..b8cdd426af8 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_archive.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_archive.rs @@ -8,7 +8,7 @@ use codex_app_server_protocol::ThreadArchiveResponse; use codex_app_server_protocol::ThreadStartParams; use codex_app_server_protocol::ThreadStartResponse; use codex_core::ARCHIVED_SESSIONS_SUBDIR; -use codex_core::find_conversation_path_by_id_str; +use codex_core::find_thread_path_by_id_str; use std::path::Path; use tempfile::TempDir; use tokio::time::timeout; @@ -39,7 +39,7 @@ async fn thread_archive_moves_rollout_into_archived_directory() -> Result<()> { assert!(!thread.id.is_empty()); // Locate the rollout path recorded for this thread id. - let rollout_path = find_conversation_path_by_id_str(codex_home.path(), &thread.id) + let rollout_path = find_thread_path_by_id_str(codex_home.path(), &thread.id) .await? .expect("expected rollout path for thread id to exist"); assert!( diff --git a/codex-rs/app-server/tests/suite/v2/thread_rollback.rs b/codex-rs/app-server/tests/suite/v2/thread_rollback.rs new file mode 100644 index 00000000000..f3313c759ef --- /dev/null +++ b/codex-rs/app-server/tests/suite/v2/thread_rollback.rs @@ -0,0 +1,177 @@ +use anyhow::Result; +use app_test_support::McpProcess; +use app_test_support::create_final_assistant_message_sse_response; +use app_test_support::create_mock_chat_completions_server_unchecked; +use app_test_support::to_response; +use codex_app_server_protocol::JSONRPCResponse; +use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::ThreadItem; +use codex_app_server_protocol::ThreadResumeParams; +use codex_app_server_protocol::ThreadResumeResponse; +use codex_app_server_protocol::ThreadRollbackParams; +use codex_app_server_protocol::ThreadRollbackResponse; +use codex_app_server_protocol::ThreadStartParams; +use codex_app_server_protocol::ThreadStartResponse; +use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::UserInput as V2UserInput; +use pretty_assertions::assert_eq; +use tempfile::TempDir; +use tokio::time::timeout; + +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); + +#[tokio::test] +async fn thread_rollback_drops_last_turns_and_persists_to_rollout() -> Result<()> { + // Three Codex turns hit the mock model (session start + two turn/start calls). + let responses = vec![ + create_final_assistant_message_sse_response("Done")?, + create_final_assistant_message_sse_response("Done")?, + create_final_assistant_message_sse_response("Done")?, + ]; + let server = create_mock_chat_completions_server_unchecked(responses).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + // Start a thread. + let start_id = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ..Default::default() + }) + .await?; + let start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_id)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(start_resp)?; + + // Two turns. + let first_text = "First"; + let turn1_id = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: first_text.to_string(), + }], + ..Default::default() + }) + .await?; + let _turn1_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn1_id)), + ) + .await??; + let _completed1 = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let turn2_id = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Second".to_string(), + }], + ..Default::default() + }) + .await?; + let _turn2_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn2_id)), + ) + .await??; + let _completed2 = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + // Roll back the last turn. + let rollback_id = mcp + .send_thread_rollback_request(ThreadRollbackParams { + thread_id: thread.id.clone(), + num_turns: 1, + }) + .await?; + let rollback_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(rollback_id)), + ) + .await??; + let ThreadRollbackResponse { + thread: rolled_back_thread, + } = to_response::(rollback_resp)?; + + assert_eq!(rolled_back_thread.turns.len(), 1); + assert_eq!(rolled_back_thread.turns[0].items.len(), 2); + match &rolled_back_thread.turns[0].items[0] { + ThreadItem::UserMessage { content, .. } => { + assert_eq!( + content, + &vec![V2UserInput::Text { + text: first_text.to_string() + }] + ); + } + other => panic!("expected user message item, got {other:?}"), + } + + // Resume and confirm the history is pruned. + let resume_id = mcp + .send_thread_resume_request(ThreadResumeParams { + thread_id: thread.id, + ..Default::default() + }) + .await?; + let resume_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(resume_id)), + ) + .await??; + let ThreadResumeResponse { thread, .. } = to_response::(resume_resp)?; + + assert_eq!(thread.turns.len(), 1); + assert_eq!(thread.turns[0].items.len(), 2); + match &thread.turns[0].items[0] { + ThreadItem::UserMessage { content, .. } => { + assert_eq!( + content, + &vec![V2UserInput::Text { + text: first_text.to_string() + }] + ); + } + other => panic!("expected user message item, got {other:?}"), + } + + Ok(()) +} + +fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> { + let config_toml = codex_home.join("config.toml"); + std::fs::write( + config_toml, + format!( + r#" +model = "mock-model" +approval_policy = "never" +sandbox_mode = "read-only" + +model_provider = "mock_provider" + +[model_providers.mock_provider] +name = "Mock provider for test" +base_url = "{server_uri}/v1" +wire_api = "chat" +request_max_retries = 0 +stream_max_retries = 0 +"# + ), + ) +} diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index 1948487d14d..ab450ea832c 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -8,9 +8,10 @@ use app_test_support::create_mock_chat_completions_server_unchecked; use app_test_support::create_shell_command_sse_response; use app_test_support::format_with_current_shell_display; use app_test_support::to_response; -use codex_app_server_protocol::ApprovalDecision; +use codex_app_server_protocol::CommandExecutionApprovalDecision; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; use codex_app_server_protocol::CommandExecutionStatus; +use codex_app_server_protocol::FileChangeApprovalDecision; use codex_app_server_protocol::FileChangeOutputDeltaNotification; use codex_app_server_protocol::FileChangeRequestApprovalResponse; use codex_app_server_protocol::ItemCompletedNotification; @@ -426,7 +427,7 @@ async fn turn_start_exec_approval_decline_v2() -> Result<()> { mcp.send_response( request_id, serde_json::to_value(CommandExecutionRequestApprovalResponse { - decision: ApprovalDecision::Decline, + decision: CommandExecutionApprovalDecision::Decline, })?, ) .await?; @@ -540,6 +541,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> { model: Some("mock-model".to_string()), effort: Some(ReasoningEffort::Medium), summary: Some(ReasoningSummary::Auto), + output_schema: None, }) .await?; timeout( @@ -566,6 +568,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> { model: Some("mock-model".to_string()), effort: Some(ReasoningEffort::Medium), summary: Some(ReasoningSummary::Auto), + output_schema: None, }) .await?; timeout( @@ -720,7 +723,7 @@ async fn turn_start_file_change_approval_v2() -> Result<()> { mcp.send_response( request_id, serde_json::to_value(FileChangeRequestApprovalResponse { - decision: ApprovalDecision::Accept, + decision: FileChangeApprovalDecision::Accept, })?, ) .await?; @@ -780,6 +783,190 @@ async fn turn_start_file_change_approval_v2() -> Result<()> { Ok(()) } +#[tokio::test] +async fn turn_start_file_change_approval_accept_for_session_persists_v2() -> Result<()> { + skip_if_no_network!(Ok(())); + + let tmp = TempDir::new()?; + let codex_home = tmp.path().join("codex_home"); + std::fs::create_dir(&codex_home)?; + let workspace = tmp.path().join("workspace"); + std::fs::create_dir(&workspace)?; + + let patch_1 = r#"*** Begin Patch +*** Add File: README.md ++new line +*** End Patch +"#; + let patch_2 = r#"*** Begin Patch +*** Update File: README.md +@@ +-new line ++updated line +*** End Patch +"#; + + let responses = vec![ + create_apply_patch_sse_response(patch_1, "patch-call-1")?, + create_final_assistant_message_sse_response("patch 1 applied")?, + create_apply_patch_sse_response(patch_2, "patch-call-2")?, + create_final_assistant_message_sse_response("patch 2 applied")?, + ]; + let server = create_mock_chat_completions_server(responses).await; + create_config_toml(&codex_home, &server.uri(), "untrusted")?; + + let mut mcp = McpProcess::new(&codex_home).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let start_req = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + cwd: Some(workspace.to_string_lossy().into_owned()), + ..Default::default() + }) + .await?; + let start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(start_resp)?; + + // First turn: expect FileChangeRequestApproval, respond with AcceptForSession, and verify the file exists. + let turn_1_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "apply patch 1".into(), + }], + cwd: Some(workspace.clone()), + ..Default::default() + }) + .await?; + let turn_1_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_1_req)), + ) + .await??; + let TurnStartResponse { turn: turn_1 } = to_response::(turn_1_resp)?; + + let started_file_change_1 = timeout(DEFAULT_READ_TIMEOUT, async { + loop { + let started_notif = mcp + .read_stream_until_notification_message("item/started") + .await?; + let started: ItemStartedNotification = + serde_json::from_value(started_notif.params.clone().expect("item/started params"))?; + if let ThreadItem::FileChange { .. } = started.item { + return Ok::(started.item); + } + } + }) + .await??; + let ThreadItem::FileChange { id, status, .. } = started_file_change_1 else { + unreachable!("loop ensures we break on file change items"); + }; + assert_eq!(id, "patch-call-1"); + assert_eq!(status, PatchApplyStatus::InProgress); + + let server_req = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_request_message(), + ) + .await??; + let ServerRequest::FileChangeRequestApproval { request_id, params } = server_req else { + panic!("expected FileChangeRequestApproval request") + }; + assert_eq!(params.item_id, "patch-call-1"); + assert_eq!(params.thread_id, thread.id); + assert_eq!(params.turn_id, turn_1.id); + + mcp.send_response( + request_id, + serde_json::to_value(FileChangeRequestApprovalResponse { + decision: FileChangeApprovalDecision::AcceptForSession, + })?, + ) + .await?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("item/fileChange/outputDelta"), + ) + .await??; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("item/completed"), + ) + .await??; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await??; + + let readme_path = workspace.join("README.md"); + assert_eq!(std::fs::read_to_string(&readme_path)?, "new line\n"); + + // Second turn: apply a patch to the same file. Approval should be skipped due to AcceptForSession. + let turn_2_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "apply patch 2".into(), + }], + cwd: Some(workspace.clone()), + ..Default::default() + }) + .await?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_2_req)), + ) + .await??; + + let started_file_change_2 = timeout(DEFAULT_READ_TIMEOUT, async { + loop { + let started_notif = mcp + .read_stream_until_notification_message("item/started") + .await?; + let started: ItemStartedNotification = + serde_json::from_value(started_notif.params.clone().expect("item/started params"))?; + if let ThreadItem::FileChange { .. } = started.item { + return Ok::(started.item); + } + } + }) + .await??; + let ThreadItem::FileChange { id, status, .. } = started_file_change_2 else { + unreachable!("loop ensures we break on file change items"); + }; + assert_eq!(id, "patch-call-2"); + assert_eq!(status, PatchApplyStatus::InProgress); + + // If the server incorrectly emits FileChangeRequestApproval, the helper below will error + // (it bails on unexpected JSONRPCMessage::Request), causing the test to fail. + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("item/fileChange/outputDelta"), + ) + .await??; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("item/completed"), + ) + .await??; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await??; + + assert_eq!(std::fs::read_to_string(readme_path)?, "updated line\n"); + + Ok(()) +} + #[tokio::test] async fn turn_start_file_change_approval_decline_v2() -> Result<()> { skip_if_no_network!(Ok(())); @@ -886,7 +1073,7 @@ async fn turn_start_file_change_approval_decline_v2() -> Result<()> { mcp.send_response( request_id, serde_json::to_value(FileChangeRequestApprovalResponse { - decision: ApprovalDecision::Decline, + decision: FileChangeApprovalDecision::Decline, })?, ) .await?; diff --git a/codex-rs/apply-patch/src/parser.rs b/codex-rs/apply-patch/src/parser.rs index 768c89ad781..8785b385199 100644 --- a/codex-rs/apply-patch/src/parser.rs +++ b/codex-rs/apply-patch/src/parser.rs @@ -227,11 +227,14 @@ fn check_start_and_end_lines_strict( first_line: Option<&&str>, last_line: Option<&&str>, ) -> Result<(), ParseError> { + let first_line = first_line.map(|line| line.trim()); + let last_line = last_line.map(|line| line.trim()); + match (first_line, last_line) { - (Some(&first), Some(&last)) if first == BEGIN_PATCH_MARKER && last == END_PATCH_MARKER => { + (Some(first), Some(last)) if first == BEGIN_PATCH_MARKER && last == END_PATCH_MARKER => { Ok(()) } - (Some(&first), _) if first != BEGIN_PATCH_MARKER => Err(InvalidPatchError(String::from( + (Some(first), _) if first != BEGIN_PATCH_MARKER => Err(InvalidPatchError(String::from( "The first line of the patch must be '*** Begin Patch'", ))), _ => Err(InvalidPatchError(String::from( @@ -444,6 +447,25 @@ fn test_parse_patch() { "The last line of the patch must be '*** End Patch'".to_string() )) ); + + assert_eq!( + parse_patch_text( + concat!( + "*** Begin Patch", + " ", + "\n*** Add File: foo\n+hi\n", + " ", + "*** End Patch" + ), + ParseMode::Strict + ) + .unwrap() + .hunks, + vec![AddFile { + path: PathBuf::from("foo"), + contents: "hi\n".to_string() + }] + ); assert_eq!( parse_patch_text( "*** Begin Patch\n\ diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/expected/keep.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/expected/keep.txt new file mode 100644 index 00000000000..2fa992c0b8b --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/expected/keep.txt @@ -0,0 +1 @@ +keep diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/input/keep.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/input/keep.txt new file mode 100644 index 00000000000..2fa992c0b8b --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/input/keep.txt @@ -0,0 +1 @@ +keep diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/input/obsolete.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/input/obsolete.txt new file mode 100644 index 00000000000..6e263abce10 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/input/obsolete.txt @@ -0,0 +1 @@ +obsolete diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/patch.txt new file mode 100644 index 00000000000..5978f738894 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_delete_file_success/patch.txt @@ -0,0 +1,3 @@ +*** Begin Patch +*** Delete File: obsolete.txt +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/expected/file.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/expected/file.txt new file mode 100644 index 00000000000..f719efd430d --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/expected/file.txt @@ -0,0 +1 @@ +two diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/input/file.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/input/file.txt new file mode 100644 index 00000000000..5626abf0f72 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/input/file.txt @@ -0,0 +1 @@ +one diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/patch.txt new file mode 100644 index 00000000000..3d2a1dbe5ec --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/020_whitespace_padded_patch_marker_lines/patch.txt @@ -0,0 +1,6 @@ +*** Begin Patch +*** Update File: file.txt +@@ +-one ++two + *** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/expected/lines.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/expected/lines.txt new file mode 100644 index 00000000000..8129d305c8e --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/expected/lines.txt @@ -0,0 +1,2 @@ +line1 +line3 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/input/lines.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/input/lines.txt new file mode 100644 index 00000000000..83db48f84ec --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/input/lines.txt @@ -0,0 +1,3 @@ +line1 +line2 +line3 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/patch.txt new file mode 100644 index 00000000000..860c6c9a990 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/021_update_file_deletion_only/patch.txt @@ -0,0 +1,7 @@ +*** Begin Patch +*** Update File: lines.txt +@@ + line1 +-line2 + line3 +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/expected/tail.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/expected/tail.txt new file mode 100644 index 00000000000..87463f92d71 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/expected/tail.txt @@ -0,0 +1,2 @@ +first +second updated diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/input/tail.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/input/tail.txt new file mode 100644 index 00000000000..66a52ee7a1d --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/input/tail.txt @@ -0,0 +1,2 @@ +first +second diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/patch.txt new file mode 100644 index 00000000000..8b16b5bd9ee --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/022_update_file_end_of_file_marker/patch.txt @@ -0,0 +1,8 @@ +*** Begin Patch +*** Update File: tail.txt +@@ + first +-second ++second updated +*** End of File +*** End Patch diff --git a/codex-rs/apply-patch/tests/suite/scenarios.rs b/codex-rs/apply-patch/tests/suite/scenarios.rs index 0e21a7bc0a1..e53f2f1192a 100644 --- a/codex-rs/apply-patch/tests/suite/scenarios.rs +++ b/codex-rs/apply-patch/tests/suite/scenarios.rs @@ -1,3 +1,4 @@ +use codex_utils_cargo_bin::find_resource; use pretty_assertions::assert_eq; use std::collections::BTreeMap; use std::fs; @@ -8,7 +9,7 @@ use tempfile::tempdir; #[test] fn test_apply_patch_scenarios() -> anyhow::Result<()> { - let scenarios_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/scenarios"); + let scenarios_dir = find_resource!("tests/fixtures/scenarios")?; for scenario in fs::read_dir(scenarios_dir)? { let scenario = scenario?; let path = scenario.path(); diff --git a/codex-rs/chatgpt/Cargo.toml b/codex-rs/chatgpt/Cargo.toml index b58cd623402..70cd0aa5aa3 100644 --- a/codex-rs/chatgpt/Cargo.toml +++ b/codex-rs/chatgpt/Cargo.toml @@ -12,6 +12,7 @@ anyhow = { workspace = true } clap = { workspace = true, features = ["derive"] } codex-common = { workspace = true, features = ["cli"] } codex-core = { workspace = true } +codex-utils-cargo-bin = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tokio = { workspace = true, features = ["full"] } diff --git a/codex-rs/chatgpt/tests/suite/apply_command_e2e.rs b/codex-rs/chatgpt/tests/suite/apply_command_e2e.rs index 2aa8b809bb7..c2d570528ce 100644 --- a/codex-rs/chatgpt/tests/suite/apply_command_e2e.rs +++ b/codex-rs/chatgpt/tests/suite/apply_command_e2e.rs @@ -1,6 +1,6 @@ use codex_chatgpt::apply_command::apply_diff_from_task; use codex_chatgpt::get_task::GetTaskResponse; -use std::path::Path; +use codex_utils_cargo_bin::find_resource; use tempfile::TempDir; use tokio::process::Command; @@ -68,8 +68,8 @@ async fn create_temp_git_repo() -> anyhow::Result { } async fn mock_get_task_with_fixture() -> anyhow::Result { - let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/task_turn_fixture.json"); - let fixture_content = std::fs::read_to_string(fixture_path)?; + let fixture_path = find_resource!("tests/task_turn_fixture.json")?; + let fixture_content = tokio::fs::read_to_string(fixture_path).await?; let response: GetTaskResponse = serde_json::from_str(&fixture_content)?; Ok(response) } diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index ae6dabe6729..8c2ff504175 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -283,7 +283,7 @@ struct StdioToUdsCommand { fn format_exit_messages(exit_info: AppExitInfo, color_enabled: bool) -> Vec { let AppExitInfo { token_usage, - conversation_id, + thread_id: conversation_id, .. } = exit_info; @@ -480,7 +480,12 @@ async fn cli_main(codex_linux_sandbox_exe: Option) -> anyhow::Result<() } Some(Subcommand::AppServer(app_server_cli)) => match app_server_cli.subcommand { None => { - codex_app_server::run_main(codex_linux_sandbox_exe, root_config_overrides).await?; + codex_app_server::run_main( + codex_linux_sandbox_exe, + root_config_overrides, + codex_core::config_loader::LoaderOverrides::default(), + ) + .await?; } Some(AppServerSubcommand::GenerateTs(gen_cli)) => { codex_app_server_protocol::generate_ts( @@ -785,7 +790,7 @@ mod tests { use super::*; use assert_matches::assert_matches; use codex_core::protocol::TokenUsage; - use codex_protocol::ConversationId; + use codex_protocol::ThreadId; use pretty_assertions::assert_eq; fn finalize_from_args(args: &[&str]) -> TuiCli { @@ -825,9 +830,7 @@ mod tests { }; AppExitInfo { token_usage, - conversation_id: conversation - .map(ConversationId::from_string) - .map(Result::unwrap), + thread_id: conversation.map(ThreadId::from_string).map(Result::unwrap), update_action: None, } } @@ -836,7 +839,7 @@ mod tests { fn format_exit_messages_skips_zero_usage() { let exit_info = AppExitInfo { token_usage: TokenUsage::default(), - conversation_id: None, + thread_id: None, update_action: None, }; let lines = format_exit_messages(exit_info, false); diff --git a/codex-rs/cli/tests/execpolicy.rs b/codex-rs/cli/tests/execpolicy.rs index 7d8a2b1c45d..30b5999c053 100644 --- a/codex-rs/cli/tests/execpolicy.rs +++ b/codex-rs/cli/tests/execpolicy.rs @@ -59,3 +59,61 @@ prefix_rule( Ok(()) } + +#[test] +fn execpolicy_check_includes_justification_when_present() -> Result<(), Box> +{ + let codex_home = TempDir::new()?; + let policy_path = codex_home.path().join("rules").join("policy.rules"); + fs::create_dir_all( + policy_path + .parent() + .expect("policy path should have a parent"), + )?; + fs::write( + &policy_path, + r#" +prefix_rule( + pattern = ["git", "push"], + decision = "forbidden", + justification = "pushing is blocked in this repo", +) +"#, + )?; + + let output = Command::new(codex_utils_cargo_bin::cargo_bin("codex")?) + .env("CODEX_HOME", codex_home.path()) + .args([ + "execpolicy", + "check", + "--rules", + policy_path + .to_str() + .expect("policy path should be valid UTF-8"), + "git", + "push", + "origin", + "main", + ]) + .output()?; + + assert!(output.status.success()); + let result: serde_json::Value = serde_json::from_slice(&output.stdout)?; + assert_eq!( + result, + json!({ + "decision": "forbidden", + "matchedRules": [ + { + "prefixRuleMatch": { + "matchedPrefix": ["git", "push"], + "decision": "forbidden", + "justification": "pushing is blocked in this repo" + } + } + ] + }) + ); + + Ok(()) +} diff --git a/codex-rs/codex-api/src/endpoint/chat.rs b/codex-rs/codex-api/src/endpoint/chat.rs index b7fa0572f0e..f747c57411f 100644 --- a/codex-rs/codex-api/src/endpoint/chat.rs +++ b/codex-rs/codex-api/src/endpoint/chat.rs @@ -10,6 +10,7 @@ use crate::provider::WireApi; use crate::sse::chat::spawn_chat_stream; use crate::telemetry::SseTelemetry; use codex_client::HttpTransport; +use codex_client::RequestCompression; use codex_client::RequestTelemetry; use codex_protocol::models::ContentItem; use codex_protocol::models::ReasoningItemContent; @@ -80,7 +81,13 @@ impl ChatClient { extra_headers: HeaderMap, ) -> Result { self.streaming - .stream(self.path(), body, extra_headers, spawn_chat_stream) + .stream( + self.path(), + body, + extra_headers, + RequestCompression::None, + spawn_chat_stream, + ) .await } } diff --git a/codex-rs/codex-api/src/endpoint/models.rs b/codex-rs/codex-api/src/endpoint/models.rs index 74e109bf2a5..9f6083dc89c 100644 --- a/codex-rs/codex-api/src/endpoint/models.rs +++ b/codex-rs/codex-api/src/endpoint/models.rs @@ -215,14 +215,14 @@ mod tests { "supported_in_api": true, "priority": 1, "upgrade": null, - "base_instructions": null, + "base_instructions": "base instructions", "supports_reasoning_summaries": false, "support_verbosity": false, "default_verbosity": null, "apply_patch_tool_type": null, "truncation_policy": {"mode": "bytes", "limit": 10_000}, "supports_parallel_tool_calls": false, - "context_window": null, + "context_window": 272_000, "experimental_supported_tools": [], })) .unwrap(), diff --git a/codex-rs/codex-api/src/endpoint/responses.rs b/codex-rs/codex-api/src/endpoint/responses.rs index 476e8b8f138..57c7b0da033 100644 --- a/codex-rs/codex-api/src/endpoint/responses.rs +++ b/codex-rs/codex-api/src/endpoint/responses.rs @@ -9,9 +9,11 @@ use crate::provider::Provider; use crate::provider::WireApi; use crate::requests::ResponsesRequest; use crate::requests::ResponsesRequestBuilder; +use crate::requests::responses::Compression; use crate::sse::spawn_response_stream; use crate::telemetry::SseTelemetry; use codex_client::HttpTransport; +use codex_client::RequestCompression; use codex_client::RequestTelemetry; use codex_protocol::protocol::SessionSource; use http::HeaderMap; @@ -33,6 +35,7 @@ pub struct ResponsesOptions { pub conversation_id: Option, pub session_source: Option, pub extra_headers: HeaderMap, + pub compression: Compression, } impl ResponsesClient { @@ -56,7 +59,8 @@ impl ResponsesClient { &self, request: ResponsesRequest, ) -> Result { - self.stream(request.body, request.headers).await + self.stream(request.body, request.headers, request.compression) + .await } #[instrument(level = "trace", skip_all, err)] @@ -75,6 +79,7 @@ impl ResponsesClient { conversation_id, session_source, extra_headers, + compression, } = options; let request = ResponsesRequestBuilder::new(model, &prompt.instructions, &prompt.input) @@ -88,6 +93,7 @@ impl ResponsesClient { .session_source(session_source) .store_override(store_override) .extra_headers(extra_headers) + .compression(compression) .build(self.streaming.provider())?; self.stream_request(request).await @@ -104,9 +110,21 @@ impl ResponsesClient { &self, body: Value, extra_headers: HeaderMap, + compression: Compression, ) -> Result { + let compression = match compression { + Compression::None => RequestCompression::None, + Compression::Zstd => RequestCompression::Zstd, + }; + self.streaming - .stream(self.path(), body, extra_headers, spawn_response_stream) + .stream( + self.path(), + body, + extra_headers, + compression, + spawn_response_stream, + ) .await } } diff --git a/codex-rs/codex-api/src/endpoint/streaming.rs b/codex-rs/codex-api/src/endpoint/streaming.rs index 156d4084bc8..de180845e83 100644 --- a/codex-rs/codex-api/src/endpoint/streaming.rs +++ b/codex-rs/codex-api/src/endpoint/streaming.rs @@ -6,6 +6,7 @@ use crate::provider::Provider; use crate::telemetry::SseTelemetry; use crate::telemetry::run_with_request_telemetry; use codex_client::HttpTransport; +use codex_client::RequestCompression; use codex_client::RequestTelemetry; use codex_client::StreamResponse; use http::HeaderMap; @@ -52,6 +53,7 @@ impl StreamingClient { path: &str, body: Value, extra_headers: HeaderMap, + compression: RequestCompression, spawner: fn(StreamResponse, Duration, Option>) -> ResponseStream, ) -> Result { let builder = || { @@ -62,6 +64,7 @@ impl StreamingClient { http::HeaderValue::from_static("text/event-stream"), ); req.body = Some(body.clone()); + req.compression = compression; add_auth_headers(&self.auth, req) }; diff --git a/codex-rs/codex-api/src/provider.rs b/codex-rs/codex-api/src/provider.rs index 8bd5fc9093c..846a25bf5e3 100644 --- a/codex-rs/codex-api/src/provider.rs +++ b/codex-rs/codex-api/src/provider.rs @@ -1,4 +1,5 @@ use codex_client::Request; +use codex_client::RequestCompression; use codex_client::RetryOn; use codex_client::RetryPolicy; use http::Method; @@ -87,6 +88,7 @@ impl Provider { url: self.url_for_path(path), headers: self.headers.clone(), body: None, + compression: RequestCompression::None, timeout: None, } } diff --git a/codex-rs/codex-api/src/requests/responses.rs b/codex-rs/codex-api/src/requests/responses.rs index 543b79bbe9d..a18a147aba9 100644 --- a/codex-rs/codex-api/src/requests/responses.rs +++ b/codex-rs/codex-api/src/requests/responses.rs @@ -11,10 +11,18 @@ use codex_protocol::protocol::SessionSource; use http::HeaderMap; use serde_json::Value; +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum Compression { + #[default] + None, + Zstd, +} + /// Assembled request body plus headers for a Responses stream request. pub struct ResponsesRequest { pub body: Value, pub headers: HeaderMap, + pub compression: Compression, } #[derive(Default)] @@ -32,6 +40,7 @@ pub struct ResponsesRequestBuilder<'a> { session_source: Option, store_override: Option, headers: HeaderMap, + compression: Compression, } impl<'a> ResponsesRequestBuilder<'a> { @@ -94,6 +103,11 @@ impl<'a> ResponsesRequestBuilder<'a> { self } + pub fn compression(mut self, compression: Compression) -> Self { + self.compression = compression; + self + } + pub fn build(self, provider: &Provider) -> Result { let model = self .model @@ -138,7 +152,11 @@ impl<'a> ResponsesRequestBuilder<'a> { insert_header(&mut headers, "x-openai-subagent", &subagent); } - Ok(ResponsesRequest { body, headers }) + Ok(ResponsesRequest { + body, + headers, + compression: self.compression, + }) } } diff --git a/codex-rs/codex-api/src/sse/chat.rs b/codex-rs/codex-api/src/sse/chat.rs index 21adfa571a2..dec35890b78 100644 --- a/codex-rs/codex-api/src/sse/chat.rs +++ b/codex-rs/codex-api/src/sse/chat.rs @@ -30,6 +30,21 @@ pub(crate) fn spawn_chat_stream( ResponseStream { rx_event } } +/// Processes Server-Sent Events from the legacy Chat Completions streaming API. +/// +/// The upstream protocol terminates a streaming response with a final sentinel event +/// (`data: [DONE]`). Historically, some of our test stubs have emitted `data: DONE` +/// (without brackets) instead. +/// +/// `eventsource_stream` delivers these sentinels as regular events rather than signaling +/// end-of-stream. If we try to parse them as JSON, we log and skip them, then keep +/// polling for more events. +/// +/// On servers that keep the HTTP connection open after emitting the sentinel (notably +/// wiremock on Windows), skipping the sentinel means we never emit `ResponseEvent::Completed`. +/// Higher-level workflows/tests that wait for completion before issuing subsequent model +/// calls will then stall, which shows up as "expected N requests, got 1" verification +/// failures in the mock server. pub async fn process_chat_sse( stream: S, tx_event: mpsc::Sender>, @@ -57,6 +72,31 @@ pub async fn process_chat_sse( let mut reasoning_item: Option = None; let mut completed_sent = false; + async fn flush_and_complete( + tx_event: &mpsc::Sender>, + reasoning_item: &mut Option, + assistant_item: &mut Option, + ) { + if let Some(reasoning) = reasoning_item.take() { + let _ = tx_event + .send(Ok(ResponseEvent::OutputItemDone(reasoning))) + .await; + } + + if let Some(assistant) = assistant_item.take() { + let _ = tx_event + .send(Ok(ResponseEvent::OutputItemDone(assistant))) + .await; + } + + let _ = tx_event + .send(Ok(ResponseEvent::Completed { + response_id: String::new(), + token_usage: None, + })) + .await; + } + loop { let start = Instant::now(); let response = timeout(idle_timeout, stream.next()).await; @@ -70,24 +110,8 @@ pub async fn process_chat_sse( return; } Ok(None) => { - if let Some(reasoning) = reasoning_item { - let _ = tx_event - .send(Ok(ResponseEvent::OutputItemDone(reasoning))) - .await; - } - - if let Some(assistant) = assistant_item { - let _ = tx_event - .send(Ok(ResponseEvent::OutputItemDone(assistant))) - .await; - } if !completed_sent { - let _ = tx_event - .send(Ok(ResponseEvent::Completed { - response_id: String::new(), - token_usage: None, - })) - .await; + flush_and_complete(&tx_event, &mut reasoning_item, &mut assistant_item).await; } return; } @@ -101,16 +125,25 @@ pub async fn process_chat_sse( trace!("SSE event: {}", sse.data); - if sse.data.trim().is_empty() { + let data = sse.data.trim(); + + if data.is_empty() { continue; } - let value: serde_json::Value = match serde_json::from_str(&sse.data) { + if data == "[DONE]" || data == "DONE" { + if !completed_sent { + flush_and_complete(&tx_event, &mut reasoning_item, &mut assistant_item).await; + } + return; + } + + let value: serde_json::Value = match serde_json::from_str(data) { Ok(val) => val, Err(err) => { debug!( "Failed to parse ChatCompletions SSE event: {err}, data: {}", - &sse.data + data ); continue; } @@ -362,6 +395,16 @@ mod tests { body } + /// Regression test: the stream should complete when we see a `[DONE]` sentinel. + /// + /// This is important for tests/mocks that don't immediately close the underlying + /// connection after emitting the sentinel. + #[tokio::test] + async fn completes_on_done_sentinel_without_json() { + let events = collect_events("event: message\ndata: [DONE]\n\n").await; + assert_matches!(&events[..], [ResponseEvent::Completed { .. }]); + } + async fn collect_events(body: &str) -> Vec { let reader = ReaderStream::new(std::io::Cursor::new(body.to_string())) .map_err(|err| codex_client::TransportError::Network(err.to_string())); diff --git a/codex-rs/codex-api/tests/clients.rs b/codex-rs/codex-api/tests/clients.rs index 3dafaf74fae..70af9fe829c 100644 --- a/codex-rs/codex-api/tests/clients.rs +++ b/codex-rs/codex-api/tests/clients.rs @@ -11,6 +11,7 @@ use codex_api::Provider; use codex_api::ResponsesClient; use codex_api::ResponsesOptions; use codex_api::WireApi; +use codex_api::requests::responses::Compression; use codex_client::HttpTransport; use codex_client::Request; use codex_client::Response; @@ -229,7 +230,9 @@ async fn responses_client_uses_responses_path_for_responses_wire() -> Result<()> let client = ResponsesClient::new(transport, provider("openai", WireApi::Responses), NoAuth); let body = serde_json::json!({ "echo": true }); - let _stream = client.stream(body, HeaderMap::new()).await?; + let _stream = client + .stream(body, HeaderMap::new(), Compression::None) + .await?; let requests = state.take_stream_requests(); assert_path_ends_with(&requests, "/responses"); @@ -243,7 +246,9 @@ async fn responses_client_uses_chat_path_for_chat_wire() -> Result<()> { let client = ResponsesClient::new(transport, provider("openai", WireApi::Chat), NoAuth); let body = serde_json::json!({ "echo": true }); - let _stream = client.stream(body, HeaderMap::new()).await?; + let _stream = client + .stream(body, HeaderMap::new(), Compression::None) + .await?; let requests = state.take_stream_requests(); assert_path_ends_with(&requests, "/chat/completions"); @@ -258,7 +263,9 @@ async fn streaming_client_adds_auth_headers() -> Result<()> { let client = ResponsesClient::new(transport, provider("openai", WireApi::Responses), auth); let body = serde_json::json!({ "model": "gpt-test" }); - let _stream = client.stream(body, HeaderMap::new()).await?; + let _stream = client + .stream(body, HeaderMap::new(), Compression::None) + .await?; let requests = state.take_stream_requests(); assert_eq!(requests.len(), 1); diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 8ed6ce0d6d5..46e1773098b 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -56,7 +56,7 @@ async fn models_client_hits_models_endpoint() { slug: "gpt-test".to_string(), display_name: "gpt-test".to_string(), description: Some("desc".to_string()), - default_reasoning_level: ReasoningEffort::Medium, + default_reasoning_level: Some(ReasoningEffort::Medium), supported_reasoning_levels: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Low, @@ -76,14 +76,16 @@ async fn models_client_hits_models_endpoint() { supported_in_api: true, priority: 1, upgrade: None, - base_instructions: None, + base_instructions: "base instructions".to_string(), supports_reasoning_summaries: false, support_verbosity: false, default_verbosity: None, apply_patch_tool_type: None, truncation_policy: TruncationPolicyConfig::bytes(10_000), supports_parallel_tool_calls: false, - context_window: None, + context_window: Some(272_000), + auto_compact_token_limit: None, + effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), }], }; diff --git a/codex-rs/codex-api/tests/sse_end_to_end.rs b/codex-rs/codex-api/tests/sse_end_to_end.rs index b91cf3a5d8e..f324cc7480b 100644 --- a/codex-rs/codex-api/tests/sse_end_to_end.rs +++ b/codex-rs/codex-api/tests/sse_end_to_end.rs @@ -9,6 +9,7 @@ use codex_api::Provider; use codex_api::ResponseEvent; use codex_api::ResponsesClient; use codex_api::WireApi; +use codex_api::requests::responses::Compression; use codex_client::HttpTransport; use codex_client::Request; use codex_client::Response; @@ -124,7 +125,11 @@ async fn responses_stream_parses_items_and_completed_end_to_end() -> Result<()> let client = ResponsesClient::new(transport, provider("openai", WireApi::Responses), NoAuth); let mut stream = client - .stream(serde_json::json!({"echo": true}), HeaderMap::new()) + .stream( + serde_json::json!({"echo": true}), + HeaderMap::new(), + Compression::None, + ) .await?; let mut events = Vec::new(); @@ -189,7 +194,11 @@ async fn responses_stream_aggregates_output_text_deltas() -> Result<()> { let client = ResponsesClient::new(transport, provider("openai", WireApi::Responses), NoAuth); let stream = client - .stream(serde_json::json!({"echo": true}), HeaderMap::new()) + .stream( + serde_json::json!({"echo": true}), + HeaderMap::new(), + Compression::None, + ) .await?; let mut stream = stream.aggregate(); diff --git a/codex-rs/codex-client/Cargo.toml b/codex-rs/codex-client/Cargo.toml index 2eeb4569372..233bea40885 100644 --- a/codex-rs/codex-client/Cargo.toml +++ b/codex-rs/codex-client/Cargo.toml @@ -19,6 +19,7 @@ thiserror = { workspace = true } tokio = { workspace = true, features = ["macros", "rt", "time", "sync"] } tracing = { workspace = true } tracing-opentelemetry = { workspace = true } +zstd = { workspace = true } [lints] workspace = true diff --git a/codex-rs/codex-client/src/default_client.rs b/codex-rs/codex-client/src/default_client.rs index efb4d5aec41..781ded3614c 100644 --- a/codex-rs/codex-client/src/default_client.rs +++ b/codex-rs/codex-client/src/default_client.rs @@ -104,6 +104,13 @@ impl CodexRequestBuilder { self.map(|builder| builder.json(value)) } + pub fn body(self, body: B) -> Self + where + B: Into, + { + self.map(|builder| builder.body(body)) + } + pub async fn send(self) -> Result { let headers = trace_headers(); diff --git a/codex-rs/codex-client/src/lib.rs b/codex-rs/codex-client/src/lib.rs index 66d1083c07d..089d777c3a2 100644 --- a/codex-rs/codex-client/src/lib.rs +++ b/codex-rs/codex-client/src/lib.rs @@ -11,6 +11,7 @@ pub use crate::default_client::CodexRequestBuilder; pub use crate::error::StreamError; pub use crate::error::TransportError; pub use crate::request::Request; +pub use crate::request::RequestCompression; pub use crate::request::Response; pub use crate::retry::RetryOn; pub use crate::retry::RetryPolicy; diff --git a/codex-rs/codex-client/src/request.rs b/codex-rs/codex-client/src/request.rs index f3d205de99c..c2c9cf2b338 100644 --- a/codex-rs/codex-client/src/request.rs +++ b/codex-rs/codex-client/src/request.rs @@ -5,12 +5,20 @@ use serde::Serialize; use serde_json::Value; use std::time::Duration; +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum RequestCompression { + #[default] + None, + Zstd, +} + #[derive(Debug, Clone)] pub struct Request { pub method: Method, pub url: String, pub headers: HeaderMap, pub body: Option, + pub compression: RequestCompression, pub timeout: Option, } @@ -21,6 +29,7 @@ impl Request { url, headers: HeaderMap::new(), body: None, + compression: RequestCompression::None, timeout: None, } } @@ -29,6 +38,11 @@ impl Request { self.body = serde_json::to_value(body).ok(); self } + + pub fn with_compression(mut self, compression: RequestCompression) -> Self { + self.compression = compression; + self + } } #[derive(Debug, Clone)] diff --git a/codex-rs/codex-client/src/transport.rs b/codex-rs/codex-client/src/transport.rs index abe6e29ee55..50e9f8fab77 100644 --- a/codex-rs/codex-client/src/transport.rs +++ b/codex-rs/codex-client/src/transport.rs @@ -2,6 +2,7 @@ use crate::default_client::CodexHttpClient; use crate::default_client::CodexRequestBuilder; use crate::error::TransportError; use crate::request::Request; +use crate::request::RequestCompression; use crate::request::Response; use async_trait::async_trait; use bytes::Bytes; @@ -41,18 +42,70 @@ impl ReqwestTransport { } fn build(&self, req: Request) -> Result { - let mut builder = self - .client - .request( - Method::from_bytes(req.method.as_str().as_bytes()).unwrap_or(Method::GET), - &req.url, - ) - .headers(req.headers); - if let Some(timeout) = req.timeout { + let Request { + method, + url, + mut headers, + body, + compression, + timeout, + } = req; + + let mut builder = self.client.request( + Method::from_bytes(method.as_str().as_bytes()).unwrap_or(Method::GET), + &url, + ); + + if let Some(timeout) = timeout { builder = builder.timeout(timeout); } - if let Some(body) = req.body { - builder = builder.json(&body); + + if let Some(body) = body { + if compression != RequestCompression::None { + if headers.contains_key(http::header::CONTENT_ENCODING) { + return Err(TransportError::Build( + "request compression was requested but content-encoding is already set" + .to_string(), + )); + } + + let json = serde_json::to_vec(&body) + .map_err(|err| TransportError::Build(err.to_string()))?; + let pre_compression_bytes = json.len(); + let compression_start = std::time::Instant::now(); + let (compressed, content_encoding) = match compression { + RequestCompression::None => unreachable!("guarded by compression != None"), + RequestCompression::Zstd => ( + zstd::stream::encode_all(std::io::Cursor::new(json), 3) + .map_err(|err| TransportError::Build(err.to_string()))?, + http::HeaderValue::from_static("zstd"), + ), + }; + let post_compression_bytes = compressed.len(); + let compression_duration = compression_start.elapsed(); + + // Ensure the server knows to unpack the request body. + headers.insert(http::header::CONTENT_ENCODING, content_encoding); + if !headers.contains_key(http::header::CONTENT_TYPE) { + headers.insert( + http::header::CONTENT_TYPE, + http::HeaderValue::from_static("application/json"), + ); + } + + tracing::info!( + pre_compression_bytes, + post_compression_bytes, + compression_duration_ms = compression_duration.as_millis(), + "Compressed request body with zstd" + ); + + builder = builder.headers(headers).body(compressed); + } else { + builder = builder.headers(headers).json(&body); + } + } else { + builder = builder.headers(headers); } Ok(builder) } diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index 51fab19decf..faa905f3e86 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -122,7 +122,8 @@ keyring = { workspace = true, features = ["sync-secret-service"] } assert_cmd = { workspace = true } assert_matches = { workspace = true } codex-arg0 = { workspace = true } -codex-core = { path = ".", features = ["deterministic_process_ids"] } +codex-core = { path = ".", default-features = false, features = ["deterministic_process_ids"] } +codex-otel = { workspace = true, features = ["disable-default-metrics-exporter"] } codex-utils-cargo-bin = { workspace = true } core_test_support = { workspace = true } ctor = { workspace = true } @@ -137,6 +138,7 @@ tracing-subscriber = { workspace = true } tracing-test = { workspace = true, features = ["no-env-filter"] } walkdir = { workspace = true } wiremock = { workspace = true } +zstd = { workspace = true } [package.metadata.cargo-shear] ignored = ["openssl-sys"] diff --git a/codex-rs/core/prompt_with_apply_patch_instructions.md b/codex-rs/core/prompt_with_apply_patch_instructions.md new file mode 100644 index 00000000000..af5537c924d --- /dev/null +++ b/codex-rs/core/prompt_with_apply_patch_instructions.md @@ -0,0 +1,386 @@ +You are a coding agent running in the Codex CLI, a terminal-based coding assistant. Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful. + +Your capabilities: + +- Receive user prompts and other context provided by the harness, such as files in the workspace. +- Communicate with the user by streaming thinking & responses, and by making & updating plans. +- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section. + +Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI). + +# How you work + +## Personality + +Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work. + +# AGENTS.md spec +- Repos often contain AGENTS.md files. These files can appear anywhere within the repository. +- These files are a way for humans to give you (the agent) instructions or tips for working within the container. +- Some examples might be: coding conventions, info about how code is organized, or instructions for how to run or test code. +- Instructions in AGENTS.md files: + - The scope of an AGENTS.md file is the entire directory tree rooted at the folder that contains it. + - For every file you touch in the final patch, you must obey instructions in any AGENTS.md file whose scope includes that file. + - Instructions about code style, structure, naming, etc. apply only to code within the AGENTS.md file's scope, unless the file states otherwise. + - More-deeply-nested AGENTS.md files take precedence in the case of conflicting instructions. + - Direct system/developer/user instructions (as part of a prompt) take precedence over AGENTS.md instructions. +- The contents of the AGENTS.md file at the root of the repo and any directories from the CWD up to the root are included with the developer message and don't need to be re-read. When working in a subdirectory of CWD, or a directory outside the CWD, check for any AGENTS.md files that may be applicable. + +## Responsiveness + +### Preamble messages + +Before making tool calls, send a brief preamble to the user explaining what you’re about to do. When sending preamble messages, follow these principles and examples: + +- **Logically group related actions**: if you’re about to run several related commands, describe them together in one preamble rather than sending a separate note for each. +- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words for quick updates). +- **Build on prior context**: if this is not your first tool call, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions. +- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging. +- **Exception**: Avoid adding a preamble for every trivial read (e.g., `cat` a single file) unless it’s part of a larger grouped action. + +**Examples:** + +- “I’ve explored the repo; now checking the API route definitions.” +- “Next, I’ll patch the config and update the related tests.” +- “I’m about to scaffold the CLI commands and helper functions.” +- “Ok cool, so I’ve wrapped my head around the repo. Now digging into the API routes.” +- “Config’s looking tidy. Next up is patching helpers to keep things in sync.” +- “Finished poking at the DB gateway. I will now chase down error handling.” +- “Alright, build pipeline order is interesting. Checking how it reports failures.” +- “Spotted a clever caching util; now hunting where it gets used.” + +## Planning + +You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go. + +Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately. + +Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step. + +Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so. + +Use a plan when: + +- The task is non-trivial and will require multiple actions over a long time horizon. +- There are logical phases or dependencies where sequencing matters. +- The work has ambiguity that benefits from outlining high-level goals. +- You want intermediate checkpoints for feedback and validation. +- When the user asked you to do more than one thing in a single prompt +- The user has asked you to use the plan tool (aka "TODOs") +- You generate additional steps while working, and plan to do them before yielding to the user + +### Examples + +**High-quality plans** + +Example 1: + +1. Add CLI entry with file args +2. Parse Markdown via CommonMark library +3. Apply semantic HTML template +4. Handle code blocks, images, links +5. Add error handling for invalid files + +Example 2: + +1. Define CSS variables for colors +2. Add toggle with localStorage state +3. Refactor components to use variables +4. Verify all views for readability +5. Add smooth theme-change transition + +Example 3: + +1. Set up Node.js + WebSocket server +2. Add join/leave broadcast events +3. Implement messaging with timestamps +4. Add usernames + mention highlighting +5. Persist messages in lightweight DB +6. Add typing indicators + unread count + +**Low-quality plans** + +Example 1: + +1. Create CLI tool +2. Add Markdown parser +3. Convert to HTML + +Example 2: + +1. Add dark mode toggle +2. Save preference +3. Make styles look good + +Example 3: + +1. Create single-file HTML game +2. Run quick sanity check +3. Summarize usage instructions + +If you need to write a plan, only write high quality plans, not low quality ones. + +## Task execution + +You are a coding agent. Please keep going until the query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer. + +You MUST adhere to the following criteria when solving queries: + +- Working on the repo(s) in the current environment is allowed, even if they are proprietary. +- Analyzing code for vulnerabilities is allowed. +- Showing user code and tool call details is allowed. +- Use the `apply_patch` tool to edit files (NEVER try `applypatch` or `apply-patch`, only `apply_patch`): {"command":["apply_patch","*** Begin Patch\\n*** Update File: path/to/file.py\\n@@ def example():\\n- pass\\n+ return 123\\n*** End Patch"]} + +If completing the user's task requires writing or modifying files, your code and final answer should follow these coding guidelines, though user instructions (i.e. AGENTS.md) may override these guidelines: + +- Fix the problem at the root cause rather than applying surface-level patches, when possible. +- Avoid unneeded complexity in your solution. +- Do not attempt to fix unrelated bugs or broken tests. It is not your responsibility to fix them. (You may mention them to the user in your final message though.) +- Update documentation as necessary. +- Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task. +- Use `git log` and `git blame` to search the history of the codebase if additional context is required. +- NEVER add copyright or license headers unless specifically requested. +- Do not waste tokens by re-reading files after calling `apply_patch` on them. The tool call will fail if it didn't work. The same goes for making folders, deleting folders, etc. +- Do not `git commit` your changes or create new git branches unless explicitly requested. +- Do not add inline comments within code unless explicitly requested. +- Do not use one-letter variable names unless explicitly requested. +- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor. + +## Sandbox and approvals + +The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from. + +Filesystem sandboxing prevents you from editing files without user approval. The options are: + +- **read-only**: You can only read files. +- **workspace-write**: You can read files. You can write to files in your workspace folder, but not outside it. +- **danger-full-access**: No filesystem sandboxing. + +Network sandboxing prevents you from accessing network without approval. Options are + +- **restricted** +- **enabled** + +Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task. Approval options are + +- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. +- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. +- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.) +- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is pared with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. + +When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: + +- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp) +- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. +- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. +- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for +- (For all of these, you should weigh alternative paths that do not require approval.) + +Note that when sandboxing is set to read-only, you'll need to request approval for any command that isn't a read. + +You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing ON, and approval on-failure. + +## Validating your work + +If the codebase has tests or the ability to build or run, consider using them to verify that your work is complete. + +When testing, your philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests. + +Similarly, once you're confident in correctness, you can suggest or use formatting commands to ensure that your code is well formatted. If there are issues you can iterate up to 3 times to get formatting right, but if you still can't manage it's better to save the user time and present them a correct solution where you call out the formatting in your final message. If the codebase does not have a formatter configured, do not add one. + +For all of testing, running, building, and formatting, do not attempt to fix unrelated bugs. It is not your responsibility to fix them. (You may mention them to the user in your final message though.) + +Be mindful of whether to run validation commands proactively. In the absence of behavioral guidance: + +- When running in non-interactive approval modes like **never** or **on-failure**, proactively run tests, lint and do whatever you need to ensure you've completed the task. +- When working in interactive approval modes like **untrusted**, or **on-request**, hold off on running tests or lint commands until the user is ready for you to finalize your output, because these commands take time to run and slow down iteration. Instead suggest what you want to do next, and let the user confirm first. +- When working on test-related tasks, such as adding tests, fixing tests, or reproducing a bug to verify behavior, you may proactively run tests regardless of approval mode. Use your judgement to decide whether this is a test-related task. + +## Ambition vs. precision + +For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation. + +If you're operating in an existing codebase, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding codebase with respect, and don't overstep (i.e. changing filenames or variables unnecessarily). You should balance being sufficiently ambitious and proactive when completing tasks of this nature. + +You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified. + +## Sharing progress updates + +For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. files explores, subtasks complete), and where you're going next. + +Before doing large chunks of work that may incur latency as experienced by the user (i.e. writing a new file), you should send a concise message to the user with an update indicating what you're about to do to ensure they know what you're spending time on. Don't start editing or writing large files before informing the user what you are doing and why. + +The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along. + +## Presenting your work and final message + +Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user’s style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges. + +You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation. + +The user is working on the same computer as you, and has access to your work. As such there's no need to show the full contents of large files you have already written unless the user explicitly asks for them. Similarly, if you've created or modified files using `apply_patch`, there's no need to tell users to "save the file" or "copy the code into a file"—just reference the file path. + +If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are running tests, committing changes, or building out the next logical component. If there’s something that you couldn't do (even with approval) but that the user might want to do (such as verifying changes by running the app), include those instructions succinctly. + +Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding. + +### Final answer structure and style guidelines + +You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value. + +**Section Headers** + +- Use only when they improve clarity — they are not mandatory for every answer. +- Choose descriptive names that fit the content +- Keep headers short (1–3 words) and in `**Title Case**`. Always start headers with `**` and end with `**` +- Leave no blank line before the first bullet under a header. +- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer. + +**Bullets** + +- Use `-` followed by a space for every bullet. +- Merge related points when possible; avoid a bullet for every trivial detail. +- Keep bullets to one line unless breaking for clarity is unavoidable. +- Group into short lists (4–6 bullets) ordered by importance. +- Use consistent keyword phrasing and formatting across sections. + +**Monospace** + +- Wrap all commands, file paths, env vars, and code identifiers in backticks (`` `...` ``). +- Apply to inline examples and to bullet keywords if the keyword itself is a literal file/command. +- Never mix monospace and bold markers; choose one based on whether it’s a keyword (`**`) or inline code/path (`` ` ``). + +**File References** +When referencing files in your response, make sure to include the relevant start line and always follow the below rules: + * Use inline code to make file paths clickable. + * Each reference should have a stand alone path. Even if it's the same file. + * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix. + * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1). + * Do not use URIs like file://, vscode://, or https://. + * Do not provide range of lines + * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5 + +**Structure** + +- Place related bullets together; don’t mix unrelated concepts in the same section. +- Order sections from general → specific → supporting info. +- For subsections (e.g., “Binaries” under “Rust Workspace”), introduce with a bolded keyword bullet, then list items under it. +- Match structure to complexity: + - Multi-part or detailed results → use clear headers and grouped bullets. + - Simple results → minimal headers, possibly just a short list or paragraph. + +**Tone** + +- Keep the voice collaborative and natural, like a coding partner handing off work. +- Be concise and factual — no filler or conversational commentary and avoid unnecessary repetition +- Use present tense and active voice (e.g., “Runs tests” not “This will run tests”). +- Keep descriptions self-contained; don’t refer to “above” or “below”. +- Use parallel structure in lists for consistency. + +**Don’t** + +- Don’t use literal words “bold” or “monospace” in the content. +- Don’t nest bullets or create deep hierarchies. +- Don’t output ANSI escape codes directly — the CLI renderer applies them. +- Don’t cram unrelated keywords into a single bullet; split for clarity. +- Don’t let keyword lists run long — wrap or reformat for scanability. + +Generally, ensure your final answers adapt their shape and depth to the request. For example, answers to code explanations should have a precise, structured explanation with code references that answer the question directly. For tasks with a simple implementation, lead with the outcome and supplement only with what’s needed for clarity. Larger changes can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions to accelerate the user. Your answers should provide the right level of detail while being easily scannable. + +For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting. + +# Tool Guidelines + +## Shell commands + +When using the shell, you must adhere to the following guidelines: + +- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.) +- Do not use python scripts to attempt to output larger chunks of a file. + +## `update_plan` + +A tool named `update_plan` is available to you. You can use it to keep an up‑to‑date, step‑by‑step plan for the task. + +To create a new plan, call `update_plan` with a short list of 1‑sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`). + +When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call. + +If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`. + +## `apply_patch` + +Use the `apply_patch` shell command to edit files. +Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope: + +*** Begin Patch +[ one or more file sections ] +*** End Patch + +Within that envelope, you get a sequence of file operations. +You MUST include a header to specify the action you are taking. +Each operation starts with one of three headers: + +*** Add File: - create a new file. Every following line is a + line (the initial contents). +*** Delete File: - remove an existing file. Nothing follows. +*** Update File: - patch an existing file in place (optionally with a rename). + +May be immediately followed by *** Move to: if you want to rename the file. +Then one or more “hunks”, each introduced by @@ (optionally followed by a hunk header). +Within a hunk each line starts with: + +For instructions on [context_before] and [context_after]: +- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change’s [context_after] lines in the second change’s [context_before] lines. +- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have: +@@ class BaseClass +[3 lines of pre-context] +- [old_code] ++ [new_code] +[3 lines of post-context] + +- If a code block is repeated so many times in a class or function such that even a single `@@` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple `@@` statements to jump to the right context. For instance: + +@@ class BaseClass +@@ def method(): +[3 lines of pre-context] +- [old_code] ++ [new_code] +[3 lines of post-context] + +The full grammar definition is below: +Patch := Begin { FileOp } End +Begin := "*** Begin Patch" NEWLINE +End := "*** End Patch" NEWLINE +FileOp := AddFile | DeleteFile | UpdateFile +AddFile := "*** Add File: " path NEWLINE { "+" line NEWLINE } +DeleteFile := "*** Delete File: " path NEWLINE +UpdateFile := "*** Update File: " path NEWLINE [ MoveTo ] { Hunk } +MoveTo := "*** Move to: " newPath NEWLINE +Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ] +HunkLine := (" " | "-" | "+") text NEWLINE + +A full patch can combine several operations: + +*** Begin Patch +*** Add File: hello.txt ++Hello world +*** Update File: src/app.py +*** Move to: src/main.py +@@ def greet(): +-print("Hi") ++print("Hello, world!") +*** Delete File: obsolete.txt +*** End Patch + +It is important to remember: + +- You must include a header with your intended action (Add/Delete/Update) +- You must prefix new lines with `+` even when creating a new file +- File references can only be relative, NEVER ABSOLUTE. + +You can invoke apply_patch like: + +``` +shell {"command":["apply_patch","*** Begin Patch\n*** Add File: hello.txt\n+Hello, world!\n*** End Patch\n"]} +``` diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs new file mode 100644 index 00000000000..201bb4e0ff6 --- /dev/null +++ b/codex-rs/core/src/agent/control.rs @@ -0,0 +1,188 @@ +use crate::CodexThread; +use crate::agent::AgentStatus; +use crate::error::CodexErr; +use crate::error::Result as CodexResult; +use crate::thread_manager::ThreadManagerState; +use codex_protocol::ThreadId; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::Op; +use codex_protocol::user_input::UserInput; +use std::sync::Arc; +use std::sync::Weak; + +/// Control-plane handle for multi-agent operations. +/// `AgentControl` is held by each session (via `SessionServices`). It provides capability to +/// spawn new agents and the inter-agent communication layer. +#[derive(Clone, Default)] +pub(crate) struct AgentControl { + /// Weak handle back to the global thread registry/state. + /// This is `Weak` to avoid reference cycles and shadow persistence of the form + /// `ThreadManagerState -> CodexThread -> Session -> SessionServices -> ThreadManagerState`. + manager: Weak, +} + +impl AgentControl { + /// Construct a new `AgentControl` that can spawn/message agents via the given manager state. + pub(crate) fn new(manager: Weak) -> Self { + Self { manager } + } + + #[allow(dead_code)] // Used by upcoming multi-agent tooling. + /// Spawn a new agent thread and submit the initial prompt. + /// + /// If `headless` is true, a background drain task is spawned to prevent unbounded event growth + /// of the channel queue when there is no client actively reading the thread events. + pub(crate) async fn spawn_agent( + &self, + config: crate::config::Config, + prompt: String, + headless: bool, + ) -> CodexResult { + let state = self.upgrade()?; + let new_thread = state.spawn_new_thread(config, self.clone()).await?; + + if headless { + spawn_headless_drain(Arc::clone(&new_thread.thread)); + } + + self.send_prompt(new_thread.thread_id, prompt).await?; + + Ok(new_thread.thread_id) + } + + #[allow(dead_code)] // Used by upcoming multi-agent tooling. + /// Send a `user` prompt to an existing agent thread. + pub(crate) async fn send_prompt( + &self, + agent_id: ThreadId, + prompt: String, + ) -> CodexResult { + let state = self.upgrade()?; + state + .send_op( + agent_id, + Op::UserInput { + items: vec![UserInput::Text { text: prompt }], + final_output_json_schema: None, + }, + ) + .await + } + + #[allow(dead_code)] // Used by upcoming multi-agent tooling. + /// Fetch the last known status for `agent_id`, returning `NotFound` when unavailable. + pub(crate) async fn get_status(&self, agent_id: ThreadId) -> AgentStatus { + let Ok(state) = self.upgrade() else { + // No agent available if upgrade fails. + return AgentStatus::NotFound; + }; + let Ok(thread) = state.get_thread(agent_id).await else { + return AgentStatus::NotFound; + }; + thread.agent_status().await + } + + fn upgrade(&self) -> CodexResult> { + self.manager + .upgrade() + .ok_or_else(|| CodexErr::UnsupportedOperation("thread manager dropped".to_string())) + } +} + +/// When an agent is spawned "headless" (no UI/view attached), there may be no consumer polling +/// `CodexThread::next_event()`. The underlying event channel is unbounded, so the producer can +/// accumulate events indefinitely. This drain task prevents that memory growth by polling and +/// discarding events until shutdown. +fn spawn_headless_drain(thread: Arc) { + tokio::spawn(async move { + loop { + match thread.next_event().await { + Ok(event) => { + if matches!(event.msg, EventMsg::ShutdownComplete) { + break; + } + } + Err(err) => { + tracing::warn!("failed to receive event from agent: {err:?}"); + break; + } + } + } + }); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent::agent_status_from_event; + use codex_protocol::protocol::ErrorEvent; + use codex_protocol::protocol::TaskCompleteEvent; + use codex_protocol::protocol::TaskStartedEvent; + use codex_protocol::protocol::TurnAbortReason; + use codex_protocol::protocol::TurnAbortedEvent; + use pretty_assertions::assert_eq; + + #[tokio::test] + async fn send_prompt_errors_when_manager_dropped() { + let control = AgentControl::default(); + let err = control + .send_prompt(ThreadId::new(), "hello".to_string()) + .await + .expect_err("send_prompt should fail without a manager"); + assert_eq!( + err.to_string(), + "unsupported operation: thread manager dropped" + ); + } + + #[tokio::test] + async fn get_status_returns_not_found_without_manager() { + let control = AgentControl::default(); + let got = control.get_status(ThreadId::new()).await; + assert_eq!(got, AgentStatus::NotFound); + } + + #[tokio::test] + async fn on_event_updates_status_from_task_started() { + let status = agent_status_from_event(&EventMsg::TaskStarted(TaskStartedEvent { + model_context_window: None, + })); + assert_eq!(status, Some(AgentStatus::Running)); + } + + #[tokio::test] + async fn on_event_updates_status_from_task_complete() { + let status = agent_status_from_event(&EventMsg::TaskComplete(TaskCompleteEvent { + last_agent_message: Some("done".to_string()), + })); + let expected = AgentStatus::Completed(Some("done".to_string())); + assert_eq!(status, Some(expected)); + } + + #[tokio::test] + async fn on_event_updates_status_from_error() { + let status = agent_status_from_event(&EventMsg::Error(ErrorEvent { + message: "boom".to_string(), + codex_error_info: None, + })); + + let expected = AgentStatus::Errored("boom".to_string()); + assert_eq!(status, Some(expected)); + } + + #[tokio::test] + async fn on_event_updates_status_from_turn_aborted() { + let status = agent_status_from_event(&EventMsg::TurnAborted(TurnAbortedEvent { + reason: TurnAbortReason::Interrupted, + })); + + let expected = AgentStatus::Errored("Interrupted".to_string()); + assert_eq!(status, Some(expected)); + } + + #[tokio::test] + async fn on_event_updates_status_from_shutdown_complete() { + let status = agent_status_from_event(&EventMsg::ShutdownComplete); + assert_eq!(status, Some(AgentStatus::Shutdown)); + } +} diff --git a/codex-rs/core/src/agent/mod.rs b/codex-rs/core/src/agent/mod.rs new file mode 100644 index 00000000000..d6348b38b3e --- /dev/null +++ b/codex-rs/core/src/agent/mod.rs @@ -0,0 +1,6 @@ +pub(crate) mod control; +pub(crate) mod status; + +pub(crate) use codex_protocol::protocol::AgentStatus; +pub(crate) use control::AgentControl; +pub(crate) use status::agent_status_from_event; diff --git a/codex-rs/core/src/agent/status.rs b/codex-rs/core/src/agent/status.rs new file mode 100644 index 00000000000..f5345a29105 --- /dev/null +++ b/codex-rs/core/src/agent/status.rs @@ -0,0 +1,15 @@ +use codex_protocol::protocol::AgentStatus; +use codex_protocol::protocol::EventMsg; + +/// Derive the next agent status from a single emitted event. +/// Returns `None` when the event does not affect status tracking. +pub(crate) fn agent_status_from_event(msg: &EventMsg) -> Option { + match msg { + EventMsg::TaskStarted(_) => Some(AgentStatus::Running), + EventMsg::TaskComplete(ev) => Some(AgentStatus::Completed(ev.last_agent_message.clone())), + EventMsg::TurnAborted(ev) => Some(AgentStatus::Errored(format!("{:?}", ev.reason))), + EventMsg::Error(ev) => Some(AgentStatus::Errored(ev.message.clone())), + EventMsg::ShutdownComplete => Some(AgentStatus::Shutdown), + _ => None, + } +} diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index 67433303e5b..1a47ca60b7d 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -1,10 +1,9 @@ -use crate::codex::Session; use crate::codex::TurnContext; use crate::function_tool::FunctionCallError; use crate::protocol::FileChange; -use crate::protocol::ReviewDecision; use crate::safety::SafetyCheck; use crate::safety::assess_patch_safety; +use crate::tools::sandboxing::ExecApprovalRequirement; use codex_apply_patch::ApplyPatchAction; use codex_apply_patch::ApplyPatchFileChange; use std::collections::HashMap; @@ -30,13 +29,12 @@ pub(crate) enum InternalApplyPatchInvocation { #[derive(Debug)] pub(crate) struct ApplyPatchExec { pub(crate) action: ApplyPatchAction, - pub(crate) user_explicitly_approved_this_action: bool, + pub(crate) auto_approved: bool, + pub(crate) exec_approval_requirement: ExecApprovalRequirement, } pub(crate) async fn apply_patch( - sess: &Session, turn_context: &TurnContext, - call_id: &str, action: ApplyPatchAction, ) -> InternalApplyPatchInvocation { match assess_patch_safety( @@ -50,40 +48,24 @@ pub(crate) async fn apply_patch( .. } => InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec { action, - user_explicitly_approved_this_action: user_explicitly_approved, + auto_approved: !user_explicitly_approved, + exec_approval_requirement: ExecApprovalRequirement::Skip { + bypass_sandbox: false, + proposed_execpolicy_amendment: None, + }, }), SafetyCheck::AskUser => { - // Compute a readable summary of path changes to include in the - // approval request so the user can make an informed decision. - // - // Note that it might be worth expanding this approval request to - // give the user the option to expand the set of writable roots so - // that similar patches can be auto-approved in the future during - // this session. - let rx_approve = sess - .request_patch_approval( - turn_context, - call_id.to_owned(), - convert_apply_patch_to_protocol(&action), - None, - None, - ) - .await; - match rx_approve.await.unwrap_or_default() { - ReviewDecision::Approved - | ReviewDecision::ApprovedExecpolicyAmendment { .. } - | ReviewDecision::ApprovedForSession => { - InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec { - action, - user_explicitly_approved_this_action: true, - }) - } - ReviewDecision::Denied | ReviewDecision::Abort => { - InternalApplyPatchInvocation::Output(Err(FunctionCallError::RespondToModel( - "patch rejected by user".to_string(), - ))) - } - } + // Delegate the approval prompt (including cached approvals) to the + // tool runtime, consistent with how shell/unified_exec approvals + // are orchestrator-driven. + InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec { + action, + auto_approved: false, + exec_approval_requirement: ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: None, + }, + }) } SafetyCheck::Reject { reason } => InternalApplyPatchInvocation::Output(Err( FunctionCallError::RespondToModel(format!("patch rejected: {reason}")), diff --git a/codex-rs/core/src/auth.rs b/codex-rs/core/src/auth.rs index 96714e3f74b..71a542912e5 100644 --- a/codex-rs/core/src/auth.rs +++ b/codex-rs/core/src/auth.rs @@ -32,11 +32,7 @@ use crate::token_data::parse_id_token; use crate::util::try_parse_error_message; use codex_client::CodexHttpClient; use codex_protocol::account::PlanType as AccountPlanType; -#[cfg(any(test, feature = "test-support"))] -use once_cell::sync::Lazy; use serde_json::Value; -#[cfg(any(test, feature = "test-support"))] -use tempfile::TempDir; use thiserror::Error; #[derive(Debug, Clone)] @@ -66,9 +62,6 @@ const REFRESH_TOKEN_UNKNOWN_MESSAGE: &str = const REFRESH_TOKEN_URL: &str = "https://auth.openai.com/oauth/token"; pub const REFRESH_TOKEN_URL_OVERRIDE_ENV_VAR: &str = "CODEX_REFRESH_TOKEN_URL_OVERRIDE"; -#[cfg(any(test, feature = "test-support"))] -static TEST_AUTH_TEMP_DIRS: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); - #[derive(Debug, Error)] pub enum RefreshTokenError { #[error("{0}")] @@ -630,6 +623,155 @@ struct CachedAuth { auth: Option, } +/// Central manager providing a single source of truth for auth.json derived +/// authentication data. It loads once (or on preference change) and then +/// hands out cloned `CodexAuth` values so the rest of the program has a +/// consistent snapshot. +/// +/// External modifications to `auth.json` will NOT be observed until +/// `reload()` is called explicitly. This matches the design goal of avoiding +/// different parts of the program seeing inconsistent auth data mid‑run. +#[derive(Debug)] +pub struct AuthManager { + codex_home: PathBuf, + inner: RwLock, + enable_codex_api_key_env: bool, + auth_credentials_store_mode: AuthCredentialsStoreMode, +} + +impl AuthManager { + /// Create a new manager loading the initial auth using the provided + /// preferred auth method. Errors loading auth are swallowed; `auth()` will + /// simply return `None` in that case so callers can treat it as an + /// unauthenticated state. + pub fn new( + codex_home: PathBuf, + enable_codex_api_key_env: bool, + auth_credentials_store_mode: AuthCredentialsStoreMode, + ) -> Self { + let auth = load_auth( + &codex_home, + enable_codex_api_key_env, + auth_credentials_store_mode, + ) + .ok() + .flatten(); + Self { + codex_home, + inner: RwLock::new(CachedAuth { auth }), + enable_codex_api_key_env, + auth_credentials_store_mode, + } + } + + #[cfg(any(test, feature = "test-support"))] + /// Create an AuthManager with a specific CodexAuth, for testing only. + pub fn from_auth_for_testing(auth: CodexAuth) -> Arc { + let cached = CachedAuth { auth: Some(auth) }; + + Arc::new(Self { + codex_home: PathBuf::from("non-existent"), + inner: RwLock::new(cached), + enable_codex_api_key_env: false, + auth_credentials_store_mode: AuthCredentialsStoreMode::File, + }) + } + + #[cfg(any(test, feature = "test-support"))] + /// Create an AuthManager with a specific CodexAuth and codex home, for testing only. + pub fn from_auth_for_testing_with_home(auth: CodexAuth, codex_home: PathBuf) -> Arc { + let cached = CachedAuth { auth: Some(auth) }; + Arc::new(Self { + codex_home, + inner: RwLock::new(cached), + enable_codex_api_key_env: false, + auth_credentials_store_mode: AuthCredentialsStoreMode::File, + }) + } + + /// Current cached auth (clone). May be `None` if not logged in or load failed. + pub fn auth(&self) -> Option { + self.inner.read().ok().and_then(|c| c.auth.clone()) + } + + /// Force a reload of the auth information from auth.json. Returns + /// whether the auth value changed. + pub fn reload(&self) -> bool { + let new_auth = load_auth( + &self.codex_home, + self.enable_codex_api_key_env, + self.auth_credentials_store_mode, + ) + .ok() + .flatten(); + if let Ok(mut guard) = self.inner.write() { + let changed = !AuthManager::auths_equal(&guard.auth, &new_auth); + guard.auth = new_auth; + changed + } else { + false + } + } + + fn auths_equal(a: &Option, b: &Option) -> bool { + match (a, b) { + (None, None) => true, + (Some(a), Some(b)) => a == b, + _ => false, + } + } + + /// Convenience constructor returning an `Arc` wrapper. + pub fn shared( + codex_home: PathBuf, + enable_codex_api_key_env: bool, + auth_credentials_store_mode: AuthCredentialsStoreMode, + ) -> Arc { + Arc::new(Self::new( + codex_home, + enable_codex_api_key_env, + auth_credentials_store_mode, + )) + } + + /// Attempt to refresh the current auth token (if any). On success, reload + /// the auth state from disk so other components observe refreshed token. + /// If the token refresh fails in a permanent (non‑transient) way, logs out + /// to clear invalid auth state. + pub async fn refresh_token(&self) -> Result, RefreshTokenError> { + let auth = match self.auth() { + Some(a) => a, + None => return Ok(None), + }; + match auth.refresh_token().await { + Ok(token) => { + // Reload to pick up persisted changes. + self.reload(); + Ok(Some(token)) + } + Err(e) => { + tracing::error!("Failed to refresh token: {}", e); + Err(e) + } + } + } + + /// Log out by deleting the on‑disk auth.json (if present). Returns Ok(true) + /// if a file was removed, Ok(false) if no auth file existed. On success, + /// reloads the in‑memory auth cache so callers immediately observe the + /// unauthenticated state. + pub fn logout(&self) -> std::io::Result { + let removed = super::auth::logout(&self.codex_home, self.auth_credentials_store_mode)?; + // Always reload to clear any cached auth (even if file absent). + self.reload(); + Ok(removed) + } + + pub fn get_auth_mode(&self) -> Option { + self.auth().map(|a| a.mode) + } +} + #[cfg(test)] mod tests { use super::*; @@ -1051,162 +1193,3 @@ mod tests { pretty_assertions::assert_eq!(auth.account_plan_type(), Some(AccountPlanType::Unknown)); } } - -/// Central manager providing a single source of truth for auth.json derived -/// authentication data. It loads once (or on preference change) and then -/// hands out cloned `CodexAuth` values so the rest of the program has a -/// consistent snapshot. -/// -/// External modifications to `auth.json` will NOT be observed until -/// `reload()` is called explicitly. This matches the design goal of avoiding -/// different parts of the program seeing inconsistent auth data mid‑run. -#[derive(Debug)] -pub struct AuthManager { - codex_home: PathBuf, - inner: RwLock, - enable_codex_api_key_env: bool, - auth_credentials_store_mode: AuthCredentialsStoreMode, -} - -impl AuthManager { - /// Create a new manager loading the initial auth using the provided - /// preferred auth method. Errors loading auth are swallowed; `auth()` will - /// simply return `None` in that case so callers can treat it as an - /// unauthenticated state. - pub fn new( - codex_home: PathBuf, - enable_codex_api_key_env: bool, - auth_credentials_store_mode: AuthCredentialsStoreMode, - ) -> Self { - let auth = load_auth( - &codex_home, - enable_codex_api_key_env, - auth_credentials_store_mode, - ) - .ok() - .flatten(); - Self { - codex_home, - inner: RwLock::new(CachedAuth { auth }), - enable_codex_api_key_env, - auth_credentials_store_mode, - } - } - - #[cfg(any(test, feature = "test-support"))] - #[expect(clippy::expect_used)] - /// Create an AuthManager with a specific CodexAuth, for testing only. - pub fn from_auth_for_testing(auth: CodexAuth) -> Arc { - let cached = CachedAuth { auth: Some(auth) }; - let temp_dir = tempfile::tempdir().expect("temp codex home"); - let codex_home = temp_dir.path().to_path_buf(); - TEST_AUTH_TEMP_DIRS - .lock() - .expect("lock test codex homes") - .push(temp_dir); - Arc::new(Self { - codex_home, - inner: RwLock::new(cached), - enable_codex_api_key_env: false, - auth_credentials_store_mode: AuthCredentialsStoreMode::File, - }) - } - - #[cfg(any(test, feature = "test-support"))] - /// Create an AuthManager with a specific CodexAuth and codex home, for testing only. - pub fn from_auth_for_testing_with_home(auth: CodexAuth, codex_home: PathBuf) -> Arc { - let cached = CachedAuth { auth: Some(auth) }; - Arc::new(Self { - codex_home, - inner: RwLock::new(cached), - enable_codex_api_key_env: false, - auth_credentials_store_mode: AuthCredentialsStoreMode::File, - }) - } - - /// Current cached auth (clone). May be `None` if not logged in or load failed. - pub fn auth(&self) -> Option { - self.inner.read().ok().and_then(|c| c.auth.clone()) - } - - pub fn codex_home(&self) -> &Path { - &self.codex_home - } - - /// Force a reload of the auth information from auth.json. Returns - /// whether the auth value changed. - pub fn reload(&self) -> bool { - let new_auth = load_auth( - &self.codex_home, - self.enable_codex_api_key_env, - self.auth_credentials_store_mode, - ) - .ok() - .flatten(); - if let Ok(mut guard) = self.inner.write() { - let changed = !AuthManager::auths_equal(&guard.auth, &new_auth); - guard.auth = new_auth; - changed - } else { - false - } - } - - fn auths_equal(a: &Option, b: &Option) -> bool { - match (a, b) { - (None, None) => true, - (Some(a), Some(b)) => a == b, - _ => false, - } - } - - /// Convenience constructor returning an `Arc` wrapper. - pub fn shared( - codex_home: PathBuf, - enable_codex_api_key_env: bool, - auth_credentials_store_mode: AuthCredentialsStoreMode, - ) -> Arc { - Arc::new(Self::new( - codex_home, - enable_codex_api_key_env, - auth_credentials_store_mode, - )) - } - - /// Attempt to refresh the current auth token (if any). On success, reload - /// the auth state from disk so other components observe refreshed token. - /// If the token refresh fails in a permanent (non‑transient) way, logs out - /// to clear invalid auth state. - pub async fn refresh_token(&self) -> Result, RefreshTokenError> { - let auth = match self.auth() { - Some(a) => a, - None => return Ok(None), - }; - match auth.refresh_token().await { - Ok(token) => { - // Reload to pick up persisted changes. - self.reload(); - Ok(Some(token)) - } - Err(e) => { - tracing::error!("Failed to refresh token: {}", e); - Err(e) - } - } - } - - /// Log out by deleting the on‑disk auth.json (if present). Returns Ok(true) - /// if a file was removed, Ok(false) if no auth file existed. On success, - /// reloads the in‑memory auth cache so callers immediately observe the - /// unauthenticated state. - pub fn logout(&self) -> std::io::Result { - let removed = super::auth::logout(&self.codex_home, self.auth_credentials_store_mode)?; - // Always reload to clear any cached auth (even if file absent). - self.reload(); - Ok(removed) - } - - pub fn get_auth_mode(&self) -> Option { - self.auth().map(|a| a.mode) - } -} diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 11a3c5c65f3..f1400d4e3c4 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -17,11 +17,13 @@ use codex_api::TransportError; use codex_api::common::Reasoning; use codex_api::create_text_param_for_request; use codex_api::error::ApiError; +use codex_api::requests::responses::Compression; use codex_app_server_protocol::AuthMode; -use codex_otel::otel_manager::OtelManager; -use codex_protocol::ConversationId; +use codex_otel::OtelManager; +use codex_protocol::ThreadId; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::protocol::SessionSource; use eventsource_stream::Event; @@ -46,10 +48,10 @@ use crate::default_client::build_reqwest_client; use crate::error::CodexErr; use crate::error::Result; use crate::features::FEATURES; +use crate::features::Feature; use crate::flags::CODEX_RS_SSE_FIXTURE; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; -use crate::models_manager::model_family::ModelFamily; use crate::tools::spec::create_tools_json_for_chat_completions_api; use crate::tools::spec::create_tools_json_for_responses_api; @@ -57,10 +59,10 @@ use crate::tools::spec::create_tools_json_for_responses_api; pub struct ModelClient { config: Arc, auth_manager: Option>, - model_family: ModelFamily, + model_info: ModelInfo, otel_manager: OtelManager, provider: ModelProviderInfo, - conversation_id: ConversationId, + conversation_id: ThreadId, effort: Option, summary: ReasoningSummaryConfig, session_source: SessionSource, @@ -71,18 +73,18 @@ impl ModelClient { pub fn new( config: Arc, auth_manager: Option>, - model_family: ModelFamily, + model_info: ModelInfo, otel_manager: OtelManager, provider: ModelProviderInfo, effort: Option, summary: ReasoningSummaryConfig, - conversation_id: ConversationId, + conversation_id: ThreadId, session_source: SessionSource, ) -> Self { Self { config, auth_manager, - model_family, + model_info, otel_manager, provider, conversation_id, @@ -93,11 +95,11 @@ impl ModelClient { } pub fn get_model_context_window(&self) -> Option { - let model_family = self.get_model_family(); - let effective_context_window_percent = model_family.effective_context_window_percent; - model_family - .context_window - .map(|w| w.saturating_mul(effective_context_window_percent) / 100) + let model_info = self.get_model_info(); + let effective_context_window_percent = model_info.effective_context_window_percent; + model_info.context_window.map(|context_window| { + context_window.saturating_mul(effective_context_window_percent) / 100 + }) } pub fn config(&self) -> Arc { @@ -146,8 +148,8 @@ impl ModelClient { } let auth_manager = self.auth_manager.clone(); - let model_family = self.get_model_family(); - let instructions = prompt.get_full_instructions(&model_family).into_owned(); + let model_info = self.get_model_info(); + let instructions = prompt.get_full_instructions(&model_info).into_owned(); let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?; let api_prompt = build_api_prompt(prompt, instructions, tools_json); let conversation_id = self.conversation_id.to_string(); @@ -200,13 +202,14 @@ impl ModelClient { } let auth_manager = self.auth_manager.clone(); - let model_family = self.get_model_family(); - let instructions = prompt.get_full_instructions(&model_family).into_owned(); + let model_info = self.get_model_info(); + let instructions = prompt.get_full_instructions(&model_info).into_owned(); let tools_json: Vec = create_tools_json_for_responses_api(&prompt.tools)?; - let reasoning = if model_family.supports_reasoning_summaries { + let default_reasoning_effort = model_info.default_reasoning_level; + let reasoning = if model_info.supports_reasoning_summaries { Some(Reasoning { - effort: self.effort.or(model_family.default_reasoning_effort), + effort: self.effort.or(default_reasoning_effort), summary: if self.summary == ReasoningSummaryConfig::None { None } else { @@ -223,15 +226,13 @@ impl ModelClient { vec![] }; - let verbosity = if model_family.support_verbosity { - self.config - .model_verbosity - .or(model_family.default_verbosity) + let verbosity = if model_info.support_verbosity { + self.config.model_verbosity.or(model_info.default_verbosity) } else { if self.config.model_verbosity.is_some() { warn!( "model_verbosity is set but ignored as the model does not support verbosity: {}", - model_family.family + model_info.slug ); } None @@ -251,6 +252,20 @@ impl ModelClient { let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?; let transport = ReqwestTransport::new(build_reqwest_client()); let (request_telemetry, sse_telemetry) = self.build_streaming_telemetry(); + let compression = if self + .config + .features + .enabled(Feature::EnableRequestCompression) + && auth + .as_ref() + .is_some_and(|auth| auth.mode == AuthMode::ChatGPT) + && self.provider.is_openai() + { + Compression::Zstd + } else { + Compression::None + }; + let client = ApiResponsesClient::new(transport, api_provider, api_auth) .with_telemetry(Some(request_telemetry), Some(sse_telemetry)); @@ -263,6 +278,7 @@ impl ModelClient { conversation_id: Some(conversation_id.clone()), session_source: Some(session_source.clone()), extra_headers: beta_feature_headers(&self.config), + compression, }; let stream_result = client @@ -298,12 +314,11 @@ impl ModelClient { /// Returns the currently configured model slug. pub fn get_model(&self) -> String { - self.get_model_family().get_model_slug().to_string() + self.model_info.slug.clone() } - /// Returns the currently configured model family. - pub fn get_model_family(&self) -> ModelFamily { - self.model_family.clone() + pub fn get_model_info(&self) -> ModelInfo { + self.model_info.clone() } /// Returns the current reasoning effort setting. @@ -340,7 +355,7 @@ impl ModelClient { .with_telemetry(Some(request_telemetry)); let instructions = prompt - .get_full_instructions(&self.get_model_family()) + .get_full_instructions(&self.get_model_info()) .into_owned(); let payload = ApiCompactionInput { model: &self.get_model(), diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 913bb223219..7d7cabcfa61 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -1,15 +1,13 @@ use crate::client_common::tools::ToolSpec; use crate::error::Result; -use crate::models_manager::model_family::ModelFamily; pub use codex_api::common::ResponseEvent; -use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ModelInfo; use futures::Stream; use serde::Deserialize; use serde_json::Value; use std::borrow::Cow; use std::collections::HashSet; -use std::ops::Deref; use std::pin::Pin; use std::task::Context; use std::task::Poll; @@ -44,28 +42,12 @@ pub struct Prompt { } impl Prompt { - pub(crate) fn get_full_instructions<'a>(&'a self, model: &'a ModelFamily) -> Cow<'a, str> { - let base = self - .base_instructions_override - .as_deref() - .unwrap_or(model.base_instructions.deref()); - // When there are no custom instructions, add apply_patch_tool_instructions if: - // - the model needs special instructions (4.1) - // AND - // - there is no apply_patch tool present - let is_apply_patch_tool_present = self.tools.iter().any(|tool| match tool { - ToolSpec::Function(f) => f.name == "apply_patch", - ToolSpec::Freeform(f) => f.name == "apply_patch", - _ => false, - }); - if self.base_instructions_override.is_none() - && model.needs_special_apply_patch_instructions - && !is_apply_patch_tool_present - { - Cow::Owned(format!("{base}\n{APPLY_PATCH_TOOL_INSTRUCTIONS}")) - } else { - Cow::Borrowed(base) - } + pub(crate) fn get_full_instructions<'a>(&'a self, model: &'a ModelInfo) -> Cow<'a, str> { + Cow::Borrowed( + self.base_instructions_override + .as_deref() + .unwrap_or(model.base_instructions.as_str()), + ) } pub(crate) fn get_formatted_input(&self) -> Vec { @@ -195,8 +177,13 @@ pub(crate) mod tools { LocalShell {}, // TODO: Understand why we get an error on web_search although the API docs say it's supported. // https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses#:~:text=%7B%20type%3A%20%22web_search%22%20%7D%2C + // The `external_web_access` field determines whether the web search is over cached or live content. + // https://platform.openai.com/docs/guides/tools-web-search#live-internet-access #[serde(rename = "web_search")] - WebSearch {}, + WebSearch { + #[serde(skip_serializing_if = "Option::is_none")] + external_web_access: Option, + }, #[serde(rename = "custom")] Freeform(FreeformTool), } @@ -206,7 +193,7 @@ pub(crate) mod tools { match self { ToolSpec::Function(tool) => tool.name.as_str(), ToolSpec::LocalShell {} => "local_shell", - ToolSpec::WebSearch {} => "web_search", + ToolSpec::WebSearch { .. } => "web_search", ToolSpec::Freeform(tool) => tool.name.as_str(), } } @@ -272,6 +259,8 @@ mod tests { let prompt = Prompt { ..Default::default() }; + let prompt_with_apply_patch_instructions = + include_str!("../prompt_with_apply_patch_instructions.md"); let test_cases = vec![ InstructionsTestCase { slug: "gpt-3.5", @@ -312,19 +301,16 @@ mod tests { ]; for test_case in test_cases { let config = test_config(); - let model_family = - ModelsManager::construct_model_family_offline(test_case.slug, &config); - let expected = if test_case.expects_apply_patch_instructions { - format!( - "{}\n{}", - model_family.clone().base_instructions, - APPLY_PATCH_TOOL_INSTRUCTIONS - ) - } else { - model_family.clone().base_instructions - }; - - let full = prompt.get_full_instructions(&model_family); + let model_info = ModelsManager::construct_model_info_offline(test_case.slug, &config); + if test_case.expects_apply_patch_instructions { + assert_eq!( + model_info.base_instructions.as_str(), + prompt_with_apply_patch_instructions + ); + } + + let expected = model_info.base_instructions.as_str(); + let full = prompt.get_full_instructions(&model_info); assert_eq!(full, expected); } } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 996d156f4d9..b140c0dc14b 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -8,6 +8,9 @@ use std::sync::atomic::Ordering; use crate::AuthManager; use crate::SandboxState; +use crate::agent::AgentControl; +use crate::agent::AgentStatus; +use crate::agent::agent_status_from_event; use crate::client_common::REVIEW_PROMPT; use crate::compact; use crate::compact::run_inline_auto_compact_task; @@ -17,7 +20,6 @@ use crate::exec_policy::ExecPolicyManager; use crate::features::Feature; use crate::features::Features; use crate::models_manager::manager::ModelsManager; -use crate::models_manager::model_family::ModelFamily; use crate::parse_command::parse_command; use crate::parse_turn_item; use crate::stream_events_utils::HandleOutputCtx; @@ -29,9 +31,10 @@ use crate::user_notification::UserNotifier; use crate::util::error_or_panic; use async_channel::Receiver; use async_channel::Sender; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::approvals::ExecPolicyAmendment; use codex_protocol::items::TurnItem; +use codex_protocol::openai_models::ModelInfo; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::HasLegacyEvent; use codex_protocol::protocol::ItemCompletedEvent; @@ -143,13 +146,13 @@ use crate::tools::sandboxing::ApprovalStore; use crate::tools::spec::ToolsConfig; use crate::tools::spec::ToolsConfigParams; use crate::turn_diff_tracker::TurnDiffTracker; -use crate::unified_exec::UnifiedExecSessionManager; +use crate::unified_exec::UnifiedExecProcessManager; use crate::user_instructions::DeveloperInstructions; use crate::user_instructions::UserInstructions; use crate::user_notification::UserNotification; use crate::util::backoff; use codex_async_utils::OrCancelExt; -use codex_otel::otel_manager::OtelManager; +use codex_otel::OtelManager; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseInputItem; @@ -167,6 +170,8 @@ pub struct Codex { pub(crate) next_id: AtomicU64, pub(crate) tx_sub: Sender, pub(crate) rx_event: Receiver, + // Last known status of the agent. + pub(crate) agent_status: Arc>, } /// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`], @@ -174,7 +179,9 @@ pub struct Codex { /// unique session id. pub struct CodexSpawnOk { pub codex: Codex, - pub conversation_id: ConversationId, + pub thread_id: ThreadId, + #[deprecated(note = "use thread_id")] + pub conversation_id: ThreadId, } pub(crate) const INITIAL_SUBMIT_ID: &str = ""; @@ -207,39 +214,34 @@ fn maybe_push_chat_wire_api_deprecation( impl Codex { /// Spawn a new [`Codex`] and initialize the session. - pub async fn spawn( + pub(crate) async fn spawn( config: Config, auth_manager: Arc, models_manager: Arc, skills_manager: Arc, conversation_history: InitialHistory, session_source: SessionSource, + agent_control: AgentControl, ) -> CodexResult { let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY); let (tx_event, rx_event) = async_channel::unbounded(); - let loaded_skills = config - .features - .enabled(Feature::Skills) - .then(|| skills_manager.skills_for_cwd(&config.cwd)); - - if let Some(outcome) = &loaded_skills { - for err in &outcome.errors { - error!( - "failed to load skill {}: {}", - err.path.display(), - err.message - ); - } + let loaded_skills = skills_manager.skills_for_config(&config); + // let loaded_skills = if config.features.enabled(Feature::Skills) { + // Some(skills_manager.skills_for_config(&config)) + // } else { + // None + // }; + + for err in &loaded_skills.errors { + error!( + "failed to load skill {}: {}", + err.path.display(), + err.message + ); } - let user_instructions = get_user_instructions( - &config, - loaded_skills - .as_ref() - .map(|outcome| outcome.skills.as_slice()), - ) - .await; + let user_instructions = get_user_instructions(&config, Some(&loaded_skills.skills)).await; let exec_policy = ExecPolicyManager::load(&config.features, &config.config_layer_stack) .await @@ -272,6 +274,7 @@ impl Codex { // Generate a unique ID for the lifetime of this Codex session. let session_source_clone = session_configuration.session_source.clone(); + let agent_status = Arc::new(RwLock::new(AgentStatus::PendingInit)); let session = Session::new( session_configuration, @@ -280,16 +283,18 @@ impl Codex { models_manager.clone(), exec_policy, tx_event.clone(), + Arc::clone(&agent_status), conversation_history, session_source_clone, skills_manager, + agent_control, ) .await .map_err(|e| { error!("Failed to create session: {e:#}"); map_session_init_error(&e, &config.codex_home) })?; - let conversation_id = session.conversation_id; + let thread_id = session.conversation_id; // This task will run until Op::Shutdown is received. tokio::spawn(submission_loop(session, config, rx_sub)); @@ -297,11 +302,14 @@ impl Codex { next_id: AtomicU64::new(0), tx_sub, rx_event, + agent_status, }; + #[allow(deprecated)] Ok(CodexSpawnOk { codex, - conversation_id, + thread_id, + conversation_id: thread_id, }) } @@ -334,14 +342,20 @@ impl Codex { .map_err(|_| CodexErr::InternalAgentDied)?; Ok(event) } + + pub(crate) async fn agent_status(&self) -> AgentStatus { + let status = self.agent_status.read().await; + status.clone() + } } /// Context for an initialized model agent /// /// A session has at most 1 running task at a time, and can be interrupted by user input. pub(crate) struct Session { - conversation_id: ConversationId, + conversation_id: ThreadId, tx_event: Sender, + agent_status: Arc>, state: Mutex, /// The set of enabled features should be invariant for the lifetime of the /// session. @@ -351,7 +365,7 @@ pub(crate) struct Session { next_internal_sub_id: AtomicU64, } -/// The context needed for a single turn of the conversation. +/// The context needed for a single turn of the thread. #[derive(Debug)] pub(crate) struct TurnContext { pub(crate) sub_id: String, @@ -487,20 +501,20 @@ impl Session { provider: ModelProviderInfo, session_configuration: &SessionConfiguration, per_turn_config: Config, - model_family: ModelFamily, - conversation_id: ConversationId, + model_info: ModelInfo, + conversation_id: ThreadId, sub_id: String, ) -> TurnContext { let otel_manager = otel_manager.clone().with_model( session_configuration.model.as_str(), - model_family.get_model_slug(), + model_info.slug.as_str(), ); let per_turn_config = Arc::new(per_turn_config); let client = ModelClient::new( per_turn_config.clone(), auth_manager, - model_family.clone(), + model_info.clone(), otel_manager, provider, session_configuration.model_reasoning_effort, @@ -510,7 +524,7 @@ impl Session { ); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &per_turn_config.features, }); @@ -530,10 +544,7 @@ impl Session { final_output_json_schema: None, codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_policy: TruncationPolicy::new( - per_turn_config.as_ref(), - model_family.truncation_policy, - ), + truncation_policy: model_info.truncation_policy.into(), } } @@ -545,9 +556,11 @@ impl Session { models_manager: Arc, exec_policy: ExecPolicyManager, tx_event: Sender, + agent_status: Arc>, initial_history: InitialHistory, session_source: SessionSource, skills_manager: Arc, + agent_control: AgentControl, ) -> anyhow::Result> { debug!( "Configuring session: model={}; provider={:?}", @@ -562,7 +575,7 @@ impl Session { let (conversation_id, rollout_params) = match &initial_history { InitialHistory::New | InitialHistory::Forked(_) => { - let conversation_id = ConversationId::default(); + let conversation_id = ThreadId::default(); ( conversation_id, RolloutRecorderParams::new( @@ -620,7 +633,6 @@ impl Session { } maybe_push_chat_wire_api_deprecation(&config, &mut post_session_configured_events); - // todo(aibrahim): why are we passing model here while it can change? let otel_manager = OtelManager::new( conversation_id, session_configuration.model.as_str(), @@ -632,6 +644,7 @@ impl Session { terminal::user_agent(), session_configuration.session_source.clone(), ); + config.features.emit_metrics(&otel_manager); otel_manager.conversation_starts( config.model_provider.name.as_str(), @@ -658,7 +671,7 @@ impl Session { let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: CancellationToken::new(), - unified_exec_manager: UnifiedExecSessionManager::default(), + unified_exec_manager: UnifiedExecProcessManager::default(), notifier: UserNotifier::new(config.notify.clone()), rollout: Mutex::new(Some(rollout_recorder)), user_shell: Arc::new(default_shell), @@ -669,11 +682,13 @@ impl Session { models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), skills_manager, + agent_control, }; let sess = Arc::new(Session { conversation_id, tx_event: tx_event.clone(), + agent_status: Arc::clone(&agent_status), state: Mutex::new(state), features: config.features.clone(), active_turn: Mutex::new(None), @@ -919,10 +934,10 @@ impl Session { } } - let model_family = self + let model_info = self .services .models_manager - .construct_model_family(session_configuration.model.as_str(), &per_turn_config) + .construct_model_info(session_configuration.model.as_str(), &per_turn_config) .await; let mut turn_context: TurnContext = Self::make_turn_context( Some(Arc::clone(&self.services.auth_manager)), @@ -930,7 +945,7 @@ impl Session { session_configuration.provider.clone(), &session_configuration, per_turn_config, - model_family, + model_info, self.conversation_id, sub_id, ); @@ -994,6 +1009,11 @@ impl Session { } pub(crate) async fn send_event_raw(&self, event: Event) { + // Record the last known agent status. + if let Some(status) = agent_status_from_event(&event.msg) { + let mut guard = self.agent_status.write().await; + *guard = status; + } // Persist the event into rollout (recorder filters as needed) let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())]; self.persist_rollout_items(&rollout_items).await; @@ -1002,6 +1022,25 @@ impl Session { } } + /// Persist the event to the rollout file, flush it, and only then deliver it to clients. + /// + /// Most events can be delivered immediately after queueing the rollout write, but some + /// clients (e.g. app-server thread/rollback) re-read the rollout file synchronously on + /// receipt of the event and depend on the marker already being visible on disk. + pub(crate) async fn send_event_raw_flushed(&self, event: Event) { + // Record the last known agent status. + if let Some(status) = agent_status_from_event(&event.msg) { + let mut guard = self.agent_status.write().await; + *guard = status; + } + self.persist_rollout_items(&[RolloutItem::EventMsg(event.msg.clone())]) + .await; + self.flush_rollout().await; + if let Err(e) = self.tx_event.send(event).await { + error!("failed to send tool call event: {e}"); + } + } + pub(crate) async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) { self.send_event( turn_context, @@ -1219,6 +1258,9 @@ impl Session { history.replace(rebuilt); } } + RolloutItem::EventMsg(EventMsg::ThreadRolledBack(rollback)) => { + history.drop_last_n_user_turns(rollback.num_turns); + } _ => {} } } @@ -1236,10 +1278,6 @@ impl Session { } pub(crate) async fn record_model_warning(&self, message: impl Into, ctx: &TurnContext) { - if !self.enabled(Feature::ModelWarnings) { - return; - } - let item = ResponseItem::Message { id: None, role: "user".to_string(), @@ -1396,14 +1434,11 @@ impl Session { } pub(crate) async fn set_total_tokens_full(&self, turn_context: &TurnContext) { - let context_window = turn_context.client.get_model_context_window(); - if let Some(context_window) = context_window { - { - let mut state = self.state.lock().await; - state.set_token_usage_full(context_window); - } - self.send_token_count_event(turn_context).await; + if let Some(context_window) = turn_context.client.get_model_context_window() { + let mut state = self.state.lock().await; + state.set_token_usage_full(context_window); } + self.send_token_count_event(turn_context).await; } pub(crate) async fn record_response_item_and_emit_turn_item( @@ -1658,6 +1693,15 @@ async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiv Op::Compact => { handlers::compact(&sess, sub.id.clone()).await; } + Op::SetSessionTitle { title } => { + handlers::set_session_title(&sess, sub.id.clone(), title).await; + } + Op::ThreadRollback { num_turns } => { + handlers::thread_rollback(&sess, sub.id.clone(), num_turns).await; + } + Op::SetSessionName { name } => { + handlers::set_session_name(&sess, sub.id.clone(), name).await; + } Op::RunUserShellCommand { command } => { handlers::run_user_shell_command( &sess, @@ -1696,7 +1740,7 @@ mod handlers { use crate::codex::spawn_review_thread; use crate::config::Config; - use crate::features::Feature; + use crate::mcp::auth::compute_auth_statuses; use crate::mcp::collect_mcp_snapshot_from_manager; use crate::review_prompts::resolve_review_request; @@ -1715,6 +1759,7 @@ mod handlers { use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::ReviewRequest; use codex_protocol::protocol::SkillsListEntry; + use codex_protocol::protocol::ThreadRolledBackEvent; use codex_protocol::protocol::TurnAbortReason; use codex_protocol::protocol::WarningEvent; @@ -1776,7 +1821,16 @@ mod handlers { final_output_json_schema: Some(final_output_json_schema), }, ), - Op::UserInput { items } => (items, SessionSettingsUpdate::default()), + Op::UserInput { + items, + final_output_json_schema, + } => ( + items, + SessionSettingsUpdate { + final_output_json_schema: Some(final_output_json_schema), + ..Default::default() + }, + ), _ => unreachable!(), }; @@ -1976,29 +2030,20 @@ mod handlers { } else { cwds }; - let skills = if sess.enabled(Feature::Skills) { - let skills_manager = &sess.services.skills_manager; - cwds.into_iter() - .map(|cwd| { - let outcome = skills_manager.skills_for_cwd_with_options(&cwd, force_reload); - let errors = super::errors_to_info(&outcome.errors); - let skills = super::skills_to_info(&outcome.skills); - SkillsListEntry { - cwd, - skills, - errors, - } - }) - .collect() - } else { - cwds.into_iter() - .map(|cwd| SkillsListEntry { - cwd, - skills: Vec::new(), - errors: Vec::new(), - }) - .collect() - }; + + let skills_manager = &sess.services.skills_manager; + let mut skills = Vec::new(); + for cwd in cwds { + let outcome = skills_manager.skills_for_cwd(&cwd, force_reload).await; + let errors = super::errors_to_info(&outcome.errors); + let skills_metadata = super::skills_to_info(&outcome.skills); + skills.push(SkillsListEntry { + cwd, + skills: skills_metadata, + errors, + }); + } + let event = Event { id: sub_id, msg: EventMsg::ListSkillsResponse(ListSkillsResponseEvent { skills }), @@ -2025,11 +2070,108 @@ mod handlers { .await; } + pub async fn set_session_title(sess: &Arc, sub_id: String, title: String) { + let title = title.trim().to_string(); + if title.is_empty() { + let event = Event { + id: sub_id, + msg: EventMsg::Error(ErrorEvent { + message: "Session title cannot be empty.".to_string(), + codex_error_info: Some(CodexErrorInfo::BadRequest), + }), + }; + sess.send_event_raw(event).await; + return; + } + } + + pub async fn thread_rollback(sess: &Arc, sub_id: String, num_turns: u32) { + if num_turns == 0 { + sess.send_event_raw(Event { + id: sub_id, + msg: EventMsg::Error(ErrorEvent { + message: "num_turns must be >= 1".to_string(), + codex_error_info: Some(CodexErrorInfo::ThreadRollbackFailed), + }), + }) + .await; + return; + } + + let has_active_turn = { sess.active_turn.lock().await.is_some() }; + if has_active_turn { + sess.send_event_raw(Event { + id: sub_id, + msg: EventMsg::Error(ErrorEvent { + message: "Cannot rollback while a turn is in progress.".to_string(), + codex_error_info: Some(CodexErrorInfo::ThreadRollbackFailed), + }), + }) + .await; + return; + } + + let turn_context = sess.new_default_turn_with_sub_id(sub_id).await; + + let mut history = sess.clone_history().await; + history.drop_last_n_user_turns(num_turns); + sess.replace_history(history.get_history()).await; + sess.recompute_token_usage(turn_context.as_ref()).await; + + sess.send_event_raw_flushed(Event { + id: turn_context.sub_id.clone(), + msg: EventMsg::ThreadRolledBack(ThreadRolledBackEvent { num_turns }), + }) + .await; + } + + pub async fn set_session_name(sess: &Arc, sub_id: String, name: String) { + let name = name.trim().to_string(); + if name.is_empty() { + let event = Event { + id: sub_id, + msg: EventMsg::Error(ErrorEvent { + message: "Session name cannot be empty.".to_string(), + codex_error_info: Some(CodexErrorInfo::BadRequest), + }), + }; + sess.send_event_raw(event).await; + return; + } + + let recorder = { + let guard = sess.services.rollout.lock().await; + guard.clone() + }; + let Some(recorder) = recorder else { + let event = Event { + id: sub_id, + msg: EventMsg::Error(ErrorEvent { + message: "Session persistence is disabled; cannot rename session.".to_string(), + codex_error_info: Some(CodexErrorInfo::Other), + }), + }; + sess.send_event_raw(event).await; + return; + }; + + if let Err(e) = recorder.set_session_name(name).await { + let event = Event { + id: sub_id, + msg: EventMsg::Error(ErrorEvent { + message: format!("Failed to set session name: {e}"), + codex_error_info: Some(CodexErrorInfo::Other), + }), + }; + sess.send_event_raw(event).await; + } + } + pub async fn shutdown(sess: &Arc, sub_id: String) -> bool { sess.abort_all_tasks(TurnAbortReason::Interrupted).await; sess.services .unified_exec_manager - .terminate_all_sessions() + .terminate_all_processes() .await; info!("Shutting down Codex instance"); @@ -2102,18 +2244,18 @@ async fn spawn_review_thread( resolved: crate::review_prompts::ResolvedReviewRequest, ) { let model = config.review_model.clone(); - let review_model_family = sess + let review_model_info = sess .services .models_manager - .construct_model_family(&model, &config) + .construct_model_info(&model, &config) .await; // For reviews, disable web_search and view_image regardless of global settings. let mut review_features = sess.features.clone(); review_features .disable(crate::features::Feature::WebSearchRequest) - .disable(crate::features::Feature::ViewImageTool); + .disable(crate::features::Feature::WebSearchCached); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &review_model_family, + model_info: &review_model_info, features: &review_features, }); @@ -2121,7 +2263,7 @@ async fn spawn_review_thread( let review_prompt = resolved.prompt.clone(); let provider = parent_turn_context.client.get_provider(); let auth_manager = parent_turn_context.client.get_auth_manager(); - let model_family = review_model_family.clone(); + let model_info = review_model_info.clone(); // Build per‑turn client with the requested model/family. let mut per_turn_config = (*config).clone(); @@ -2131,14 +2273,14 @@ async fn spawn_review_thread( let otel_manager = parent_turn_context.client.get_otel_manager().with_model( config.review_model.as_str(), - review_model_family.slug.as_str(), + review_model_info.slug.as_str(), ); let per_turn_config = Arc::new(per_turn_config); let client = ModelClient::new( per_turn_config.clone(), auth_manager, - model_family.clone(), + model_info.clone(), otel_manager, provider, per_turn_config.model_reasoning_effort, @@ -2163,7 +2305,7 @@ async fn spawn_review_thread( final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_policy: TruncationPolicy::new(&per_turn_config, model_family.truncation_policy), + truncation_policy: model_info.truncation_policy.into(), }; // Seed the child task with the review prompt as the initial user message. @@ -2229,11 +2371,8 @@ pub(crate) async fn run_task( return None; } - let auto_compact_limit = turn_context - .client - .get_model_family() - .auto_compact_token_limit() - .unwrap_or(i64::MAX); + let model_info = turn_context.client.get_model_info(); + let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX); let total_usage_tokens = sess.get_total_token_usage().await; if total_usage_tokens >= auto_compact_limit { run_auto_compact(&sess, &turn_context).await; @@ -2243,11 +2382,12 @@ pub(crate) async fn run_task( }); sess.send_event(&turn_context, event).await; - let skills_outcome = sess.enabled(Feature::Skills).then(|| { + let skills_outcome = Some( sess.services .skills_manager - .skills_for_cwd(&turn_context.cwd) - }); + .skills_for_cwd(&turn_context.cwd, false) + .await, + ); let SkillInjections { items: skill_items, @@ -2406,20 +2546,20 @@ async fn run_turn( let model_supports_parallel = turn_context .client - .get_model_family() + .get_model_info() .supports_parallel_tool_calls; let prompt = Prompt { input, tools: router.specs(), - parallel_tool_calls: model_supports_parallel && sess.enabled(Feature::ParallelToolCalls), + parallel_tool_calls: model_supports_parallel, base_instructions_override: turn_context.base_instructions.clone(), output_schema: turn_context.final_output_json_schema.clone(), }; let mut retries = 0; loop { - match try_run_turn( + let err = match try_run_turn( Arc::clone(&router), Arc::clone(&sess), Arc::clone(&turn_context), @@ -2429,17 +2569,10 @@ async fn run_turn( ) .await { - // todo(aibrahim): map special cases and ? on other errors Ok(output) => return Ok(output), - Err(CodexErr::TurnAborted) => { - return Err(CodexErr::TurnAborted); - } - Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), - Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), - Err(e @ CodexErr::Fatal(_)) => return Err(e), - Err(e @ CodexErr::ContextWindowExceeded) => { + Err(CodexErr::ContextWindowExceeded) => { sess.set_total_tokens_full(&turn_context).await; - return Err(e); + return Err(CodexErr::ContextWindowExceeded); } Err(CodexErr::UsageLimitReached(e)) => { let rate_limits = e.rate_limits.clone(); @@ -2448,39 +2581,38 @@ async fn run_turn( } return Err(CodexErr::UsageLimitReached(e)); } - Err(CodexErr::UsageNotIncluded) => return Err(CodexErr::UsageNotIncluded), - Err(e @ CodexErr::QuotaExceeded) => return Err(e), - Err(e @ CodexErr::InvalidImageRequest()) => return Err(e), - Err(e @ CodexErr::InvalidRequest(_)) => return Err(e), - Err(e @ CodexErr::RefreshTokenFailed(_)) => return Err(e), - Err(e) => { - // Use the configured provider-specific stream retry budget. - let max_retries = turn_context.client.get_provider().stream_max_retries(); - if retries < max_retries { - retries += 1; - let delay = match e { - CodexErr::Stream(_, Some(delay)) => delay, - _ => backoff(retries), - }; - warn!( - "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...", - ); + Err(err) => err, + }; - // Surface retry information to any UI/front‑end so the - // user understands what is happening instead of staring - // at a seemingly frozen screen. - sess.notify_stream_error( - &turn_context, - format!("Reconnecting... {retries}/{max_retries}"), - e, - ) - .await; + if !err.is_retryable() { + return Err(err); + } - tokio::time::sleep(delay).await; - } else { - return Err(e); + // Use the configured provider-specific stream retry budget. + let max_retries = turn_context.client.get_provider().stream_max_retries(); + if retries < max_retries { + retries += 1; + let delay = match &err { + CodexErr::Stream(_, requested_delay) => { + requested_delay.unwrap_or_else(|| backoff(retries)) } - } + _ => backoff(retries), + }; + warn!("stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",); + + // Surface retry information to any UI/front‑end so the + // user understands what is happening instead of staring + // at a seemingly frozen screen. + sess.notify_stream_error( + &turn_context, + format!("Reconnecting... {retries}/{max_retries}"), + err, + ) + .await; + + tokio::time::sleep(delay).await; + } else { + return Err(err); } } } @@ -2833,7 +2965,7 @@ mod tests { session .record_initial_history(InitialHistory::Resumed(ResumedHistory { - conversation_id: ConversationId::default(), + conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) @@ -2910,7 +3042,7 @@ mod tests { session .record_initial_history(InitialHistory::Resumed(ResumedHistory { - conversation_id: ConversationId::default(), + conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) @@ -2933,6 +3065,131 @@ mod tests { assert_eq!(expected, actual); } + #[tokio::test] + async fn thread_rollback_drops_last_turn_from_history() { + let (sess, tc, rx) = make_session_and_context_with_rx().await; + + let initial_context = sess.build_initial_context(tc.as_ref()); + sess.record_into_history(&initial_context, tc.as_ref()) + .await; + + let turn_1 = vec![ + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "turn 1 user".to_string(), + }], + }, + ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![ContentItem::OutputText { + text: "turn 1 assistant".to_string(), + }], + }, + ]; + sess.record_into_history(&turn_1, tc.as_ref()).await; + + let turn_2 = vec![ + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "turn 2 user".to_string(), + }], + }, + ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![ContentItem::OutputText { + text: "turn 2 assistant".to_string(), + }], + }, + ]; + sess.record_into_history(&turn_2, tc.as_ref()).await; + + handlers::thread_rollback(&sess, "sub-1".to_string(), 1).await; + + let rollback_event = wait_for_thread_rolled_back(&rx).await; + assert_eq!(rollback_event.num_turns, 1); + + let mut expected = Vec::new(); + expected.extend(initial_context); + expected.extend(turn_1); + + let actual = sess.clone_history().await.get_history(); + assert_eq!(expected, actual); + } + + #[tokio::test] + async fn thread_rollback_clears_history_when_num_turns_exceeds_existing_turns() { + let (sess, tc, rx) = make_session_and_context_with_rx().await; + + let initial_context = sess.build_initial_context(tc.as_ref()); + sess.record_into_history(&initial_context, tc.as_ref()) + .await; + + let turn_1 = vec![ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "turn 1 user".to_string(), + }], + }]; + sess.record_into_history(&turn_1, tc.as_ref()).await; + + handlers::thread_rollback(&sess, "sub-1".to_string(), 99).await; + + let rollback_event = wait_for_thread_rolled_back(&rx).await; + assert_eq!(rollback_event.num_turns, 99); + + let actual = sess.clone_history().await.get_history(); + assert_eq!(initial_context, actual); + } + + #[tokio::test] + async fn thread_rollback_fails_when_turn_in_progress() { + let (sess, tc, rx) = make_session_and_context_with_rx().await; + + let initial_context = sess.build_initial_context(tc.as_ref()); + sess.record_into_history(&initial_context, tc.as_ref()) + .await; + + *sess.active_turn.lock().await = Some(crate::state::ActiveTurn::default()); + handlers::thread_rollback(&sess, "sub-1".to_string(), 1).await; + + let error_event = wait_for_thread_rollback_failed(&rx).await; + assert_eq!( + error_event.codex_error_info, + Some(CodexErrorInfo::ThreadRollbackFailed) + ); + + let actual = sess.clone_history().await.get_history(); + assert_eq!(initial_context, actual); + } + + #[tokio::test] + async fn thread_rollback_fails_when_num_turns_is_zero() { + let (sess, tc, rx) = make_session_and_context_with_rx().await; + + let initial_context = sess.build_initial_context(tc.as_ref()); + sess.record_into_history(&initial_context, tc.as_ref()) + .await; + + handlers::thread_rollback(&sess, "sub-1".to_string(), 0).await; + + let error_event = wait_for_thread_rollback_failed(&rx).await; + assert_eq!(error_event.message, "num_turns must be >= 1"); + assert_eq!( + error_event.codex_error_info, + Some(CodexErrorInfo::ThreadRollbackFailed) + ); + + let actual = sess.clone_history().await.get_history(); + assert_eq!(initial_context, actual); + } + #[tokio::test] async fn set_rate_limits_retains_previous_credits() { let codex_home = tempfile::tempdir().expect("create temp dir"); @@ -3166,6 +3423,44 @@ mod tests { assert_eq!(expected, got); } + async fn wait_for_thread_rolled_back( + rx: &async_channel::Receiver, + ) -> crate::protocol::ThreadRolledBackEvent { + let deadline = StdDuration::from_secs(2); + let start = std::time::Instant::now(); + loop { + let remaining = deadline.saturating_sub(start.elapsed()); + let evt = tokio::time::timeout(remaining, rx.recv()) + .await + .expect("timeout waiting for event") + .expect("event"); + match evt.msg { + EventMsg::ThreadRolledBack(payload) => return payload, + _ => continue, + } + } + } + + async fn wait_for_thread_rollback_failed(rx: &async_channel::Receiver) -> ErrorEvent { + let deadline = StdDuration::from_secs(2); + let start = std::time::Instant::now(); + loop { + let remaining = deadline.saturating_sub(start.elapsed()); + let evt = tokio::time::timeout(remaining, rx.recv()) + .await + .expect("timeout waiting for event") + .expect("event"); + match evt.msg { + EventMsg::Error(payload) + if payload.codex_error_info == Some(CodexErrorInfo::ThreadRollbackFailed) => + { + return payload; + } + _ => continue, + } + } + } + fn text_block(s: &str) -> ContentBlock { ContentBlock::TextContent(TextContent { annotations: None, @@ -3183,15 +3478,15 @@ mod tests { } fn otel_manager( - conversation_id: ConversationId, + conversation_id: ThreadId, config: &Config, - model_family: &ModelFamily, + model_info: &ModelInfo, session_source: SessionSource, ) -> OtelManager { OtelManager::new( conversation_id, ModelsManager::get_model_offline(config.model.as_deref()).as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), Some(AuthMode::ChatGPT), @@ -3206,11 +3501,16 @@ mod tests { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); - let conversation_id = ConversationId::default(); + let conversation_id = ThreadId::default(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); - let models_manager = Arc::new(ModelsManager::new(auth_manager.clone())); + let models_manager = Arc::new(ModelsManager::new( + config.codex_home.clone(), + auth_manager.clone(), + )); + let agent_control = AgentControl::default(); let exec_policy = ExecPolicyManager::default(); + let agent_status = Arc::new(RwLock::new(AgentStatus::PendingInit)); let model = ModelsManager::get_model_offline(config.model.as_deref()); let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), @@ -3228,14 +3528,14 @@ mod tests { session_source: SessionSource::Exec, }; let per_turn_config = Session::build_per_turn_config(&session_configuration); - let model_family = ModelsManager::construct_model_family_offline( + let model_info = ModelsManager::construct_model_info_offline( session_configuration.model.as_str(), &per_turn_config, ); let otel_manager = otel_manager( conversation_id, config.as_ref(), - &model_family, + &model_info, session_configuration.session_source.clone(), ); @@ -3245,7 +3545,7 @@ mod tests { let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: CancellationToken::new(), - unified_exec_manager: UnifiedExecSessionManager::default(), + unified_exec_manager: UnifiedExecProcessManager::default(), notifier: UserNotifier::new(None), rollout: Mutex::new(None), user_shell: Arc::new(default_user_shell()), @@ -3256,6 +3556,7 @@ mod tests { models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), skills_manager, + agent_control, }; let turn_context = Session::make_turn_context( @@ -3264,7 +3565,7 @@ mod tests { session_configuration.provider.clone(), &session_configuration, per_turn_config, - model_family, + model_info, conversation_id, "turn_id".to_string(), ); @@ -3272,6 +3573,7 @@ mod tests { let session = Session { conversation_id, tx_event, + agent_status: Arc::clone(&agent_status), state: Mutex::new(state), features: config.features.clone(), active_turn: Mutex::new(None), @@ -3293,11 +3595,16 @@ mod tests { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); - let conversation_id = ConversationId::default(); + let conversation_id = ThreadId::default(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); - let models_manager = Arc::new(ModelsManager::new(auth_manager.clone())); + let models_manager = Arc::new(ModelsManager::new( + config.codex_home.clone(), + auth_manager.clone(), + )); + let agent_control = AgentControl::default(); let exec_policy = ExecPolicyManager::default(); + let agent_status = Arc::new(RwLock::new(AgentStatus::PendingInit)); let model = ModelsManager::get_model_offline(config.model.as_deref()); let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), @@ -3315,14 +3622,14 @@ mod tests { session_source: SessionSource::Exec, }; let per_turn_config = Session::build_per_turn_config(&session_configuration); - let model_family = ModelsManager::construct_model_family_offline( + let model_info = ModelsManager::construct_model_info_offline( session_configuration.model.as_str(), &per_turn_config, ); let otel_manager = otel_manager( conversation_id, config.as_ref(), - &model_family, + &model_info, session_configuration.session_source.clone(), ); @@ -3332,7 +3639,7 @@ mod tests { let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: CancellationToken::new(), - unified_exec_manager: UnifiedExecSessionManager::default(), + unified_exec_manager: UnifiedExecProcessManager::default(), notifier: UserNotifier::new(None), rollout: Mutex::new(None), user_shell: Arc::new(default_user_shell()), @@ -3343,6 +3650,7 @@ mod tests { models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), skills_manager, + agent_control, }; let turn_context = Arc::new(Session::make_turn_context( @@ -3351,7 +3659,7 @@ mod tests { session_configuration.provider.clone(), &session_configuration, per_turn_config, - model_family, + model_info, conversation_id, "turn_id".to_string(), )); @@ -3359,6 +3667,7 @@ mod tests { let session = Arc::new(Session { conversation_id, tx_event, + agent_status: Arc::clone(&agent_status), state: Mutex::new(state), features: config.features.clone(), active_turn: Mutex::new(None), @@ -3372,12 +3681,11 @@ mod tests { #[tokio::test] async fn record_model_warning_appends_user_message() { let (mut session, turn_context) = make_session_and_context().await; - let mut features = Features::with_defaults(); - features.enable(Feature::ModelWarnings); + let features = Features::with_defaults(); session.features = features; session - .record_model_warning("too many unified exec sessions", &turn_context) + .record_model_warning("too many unified exec processes", &turn_context) .await; let mut history = session.clone_history().await; @@ -3390,7 +3698,7 @@ mod tests { assert_eq!( content, &vec![ContentItem::InputText { - text: "Warning: too many unified exec sessions".to_string(), + text: "Warning: too many unified exec processes".to_string(), }] ); } diff --git a/codex-rs/core/src/codex_delegate.rs b/codex-rs/core/src/codex_delegate.rs index a7e70ff2345..72c2911bfe8 100644 --- a/codex-rs/core/src/codex_delegate.rs +++ b/codex-rs/core/src/codex_delegate.rs @@ -28,12 +28,12 @@ use crate::error::CodexErr; use crate::models_manager::manager::ModelsManager; use codex_protocol::protocol::InitialHistory; -/// Start an interactive sub-Codex conversation and return IO channels. +/// Start an interactive sub-Codex thread and return IO channels. /// /// The returned `events_rx` yields non-approval events emitted by the sub-agent. /// Approval requests are handled via `parent_session` and are not surfaced. /// The returned `ops_tx` allows the caller to submit additional `Op`s to the sub-agent. -pub(crate) async fn run_codex_conversation_interactive( +pub(crate) async fn run_codex_thread_interactive( config: Config, auth_manager: Arc, models_manager: Arc, @@ -52,6 +52,7 @@ pub(crate) async fn run_codex_conversation_interactive( Arc::clone(&parent_session.services.skills_manager), initial_history.unwrap_or(InitialHistory::New), SessionSource::SubAgent(SubAgentSource::Review), + parent_session.services.agent_control.clone(), ) .await?; let codex = Arc::new(codex); @@ -86,6 +87,7 @@ pub(crate) async fn run_codex_conversation_interactive( next_id: AtomicU64::new(0), tx_sub: tx_ops, rx_event: rx_sub, + agent_status: Arc::clone(&codex.agent_status), }) } @@ -93,7 +95,7 @@ pub(crate) async fn run_codex_conversation_interactive( /// /// Internally calls the interactive variant, then immediately submits the provided input. #[allow(clippy::too_many_arguments)] -pub(crate) async fn run_codex_conversation_one_shot( +pub(crate) async fn run_codex_thread_one_shot( config: Config, auth_manager: Arc, models_manager: Arc, @@ -106,7 +108,7 @@ pub(crate) async fn run_codex_conversation_one_shot( // Use a child token so we can stop the delegate after completion without // requiring the caller to cancel the parent token. let child_cancel = cancel_token.child_token(); - let io = run_codex_conversation_interactive( + let io = run_codex_thread_interactive( config, auth_manager, models_manager, @@ -118,11 +120,16 @@ pub(crate) async fn run_codex_conversation_one_shot( .await?; // Send the initial input to kick off the one-shot turn. - io.submit(Op::UserInput { items: input }).await?; + io.submit(Op::UserInput { + items: input, + final_output_json_schema: None, + }) + .await?; // Bridge events so we can observe completion and shut down automatically. let (tx_bridge, rx_bridge) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY); let ops_tx = io.tx_sub.clone(); + let agent_status = Arc::clone(&io.agent_status); let io_for_bridge = io; tokio::spawn(async move { while let Ok(event) = io_for_bridge.next_event().await { @@ -154,6 +161,7 @@ pub(crate) async fn run_codex_conversation_one_shot( next_id: AtomicU64::new(0), rx_event: rx_bridge, tx_sub: tx_closed, + agent_status, }) } @@ -368,6 +376,7 @@ mod tests { next_id: AtomicU64::new(0), tx_sub, rx_event: rx_events, + agent_status: Default::default(), }); let (session, ctx, _rx_evt) = crate::codex::make_session_and_context_with_rx().await; diff --git a/codex-rs/core/src/codex_conversation.rs b/codex-rs/core/src/codex_thread.rs similarity index 79% rename from codex-rs/core/src/codex_conversation.rs rename to codex-rs/core/src/codex_thread.rs index 5bb9c97c5b9..e8a37993030 100644 --- a/codex-rs/core/src/codex_conversation.rs +++ b/codex-rs/core/src/codex_thread.rs @@ -1,3 +1,4 @@ +use crate::agent::AgentStatus; use crate::codex::Codex; use crate::error::Result as CodexResult; use crate::protocol::Event; @@ -5,14 +6,14 @@ use crate::protocol::Op; use crate::protocol::Submission; use std::path::PathBuf; -pub struct CodexConversation { +pub struct CodexThread { codex: Codex, rollout_path: PathBuf, } -/// Conduit for the bidirectional stream of messages that compose a conversation -/// in Codex. -impl CodexConversation { +/// Conduit for the bidirectional stream of messages that compose a thread +/// (formerly called a conversation) in Codex. +impl CodexThread { pub(crate) fn new(codex: Codex, rollout_path: PathBuf) -> Self { Self { codex, @@ -33,6 +34,10 @@ impl CodexConversation { self.codex.next_event().await } + pub async fn agent_status(&self) -> AgentStatus { + self.codex.agent_status().await + } + pub fn rollout_path(&self) -> PathBuf { self.rollout_path.clone() } diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 77b9303e920..b608cfa4672 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -108,7 +108,7 @@ async fn run_compact_task_inner( sess.notify_background_event( turn_context.as_ref(), format!( - "Trimmed {truncated_count} older conversation item(s) before compacting so the prompt fits the model context window." + "Trimmed {truncated_count} older thread item(s) before compacting so the prompt fits the model context window." ), ) .await; @@ -182,7 +182,7 @@ async fn run_compact_task_inner( sess.send_event(&turn_context, event).await; let warning = EventMsg::Warning(WarningEvent { - message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(), + message: "Heads up: Long threads and multiple compactions can cause the model to be less accurate. Start a new thread when possible to keep threads small and targeted.".to_string(), }); sess.send_event(&turn_context, warning).await; } diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 53864851ae0..21139b47d4f 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -268,7 +268,6 @@ pub struct Config { /// Additional filenames to try when looking for project-level docs. pub project_doc_fallback_filenames: Vec, - // todo(aibrahim): this should be used in the override model family /// Token budget applied when storing tool/function outputs in the context manager. pub tool_output_token_limit: Option, @@ -316,7 +315,7 @@ pub struct Config { /// Include the `apply_patch` tool for models that benefit from invoking /// file edits as a structured tool call. When unset, this falls back to the - /// model family's default preference. + /// model info's default preference. pub include_apply_patch_tool: bool, pub tools_web_search_request: bool, @@ -353,6 +352,10 @@ pub struct Config { /// or placeholder replacement will occur for fast keypress bursts. pub disable_paste_burst: bool, + /// When `false`, disables analytics across Codex product surfaces in this machine. + /// Defaults to `true`. + pub analytics: bool, + /// OTEL configuration (exporter type, endpoint, headers, etc.). pub otel: crate::config::types::OtelConfig, } @@ -813,6 +816,10 @@ pub struct ConfigToml { /// or placeholder replacement will occur for fast keypress bursts. pub disable_paste_burst: Option, + /// When `false`, disables analytics across Codex product surfaces in this machine. + /// Defaults to `true`. + pub analytics: Option, + /// OTEL configuration. pub otel: Option, @@ -1390,6 +1397,12 @@ impl Config { notices: cfg.notice.unwrap_or_default(), check_for_update_on_startup, disable_paste_burst: cfg.disable_paste_burst.unwrap_or(false), + analytics: config_profile + .analytics + .as_ref() + .and_then(|a| a.enabled) + .or(cfg.analytics.as_ref().and_then(|a| a.enabled)) + .unwrap_or(true), tui_notifications: cfg .tui .as_ref() @@ -1426,11 +1439,13 @@ impl Config { .unwrap_or(DEFAULT_OTEL_ENVIRONMENT.to_string()); let exporter = t.exporter.unwrap_or(OtelExporterKind::None); let trace_exporter = t.trace_exporter.unwrap_or_else(|| exporter.clone()); + let metrics_exporter = t.metrics_exporter.unwrap_or(OtelExporterKind::Statsig); OtelConfig { log_user_prompt, environment, exporter, trace_exporter, + metrics_exporter, } }, }; @@ -1869,7 +1884,7 @@ trust_level = "trusted" profiles.insert( "work".to_string(), ConfigProfile { - tools_view_image: Some(false), + tools_web_search: Some(false), ..Default::default() }, ); @@ -1885,7 +1900,7 @@ trust_level = "trusted" codex_home.path().to_path_buf(), )?; - assert!(!config.features.enabled(Feature::ViewImageTool)); + assert!(!config.features.enabled(Feature::WebSearchRequest)); Ok(()) } @@ -2050,6 +2065,7 @@ trust_level = "trusted" managed_config_path: Some(managed_path.clone()), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }; let cwd = AbsolutePathBuf::try_from(codex_home.path())?; @@ -2170,6 +2186,7 @@ trust_level = "trusted" managed_config_path: Some(managed_path), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }; let cwd = AbsolutePathBuf::try_from(codex_home.path())?; @@ -3037,6 +3054,9 @@ approval_policy = "untrusted" # `ConfigOverrides`. profile = "gpt3" +[analytics] +enabled = true + [model_providers.openai-chat-completions] name = "OpenAI using Chat Completions" base_url = "https://api.openai.com/v1" @@ -3062,6 +3082,9 @@ model = "o3" model_provider = "openai" approval_policy = "on-failure" +[profiles.zdr.analytics] +enabled = false + [profiles.gpt5] model = "gpt-5.1" model_provider = "openai" @@ -3202,6 +3225,7 @@ model_verbosity = "high" tui_notifications: Default::default(), animations: true, show_tooltips: true, + analytics: true, tui_scroll_events_per_tick: None, tui_scroll_wheel_lines: None, tui_scroll_trackpad_lines: None, @@ -3285,6 +3309,7 @@ model_verbosity = "high" tui_notifications: Default::default(), animations: true, show_tooltips: true, + analytics: true, tui_scroll_events_per_tick: None, tui_scroll_wheel_lines: None, tui_scroll_trackpad_lines: None, @@ -3383,6 +3408,7 @@ model_verbosity = "high" tui_notifications: Default::default(), animations: true, show_tooltips: true, + analytics: false, tui_scroll_events_per_tick: None, tui_scroll_wheel_lines: None, tui_scroll_trackpad_lines: None, @@ -3467,6 +3493,7 @@ model_verbosity = "high" tui_notifications: Default::default(), animations: true, show_tooltips: true, + analytics: true, tui_scroll_events_per_tick: None, tui_scroll_wheel_lines: None, tui_scroll_trackpad_lines: None, diff --git a/codex-rs/core/src/config/profile.rs b/codex-rs/core/src/config/profile.rs index 9007064905a..e1c45c1f169 100644 --- a/codex-rs/core/src/config/profile.rs +++ b/codex-rs/core/src/config/profile.rs @@ -29,6 +29,7 @@ pub struct ConfigProfile { pub experimental_use_freeform_apply_patch: Option, pub tools_web_search: Option, pub tools_view_image: Option, + pub analytics: Option, /// Optional feature toggles scoped to this profile. #[serde(default)] pub features: Option, diff --git a/codex-rs/core/src/config/service.rs b/codex-rs/core/src/config/service.rs index bc6d96bcb84..913c02df1d0 100644 --- a/codex-rs/core/src/config/service.rs +++ b/codex-rs/core/src/config/service.rs @@ -4,6 +4,7 @@ use crate::config::edit::ConfigEdit; use crate::config::edit::ConfigEditsBuilder; use crate::config_loader::ConfigLayerEntry; use crate::config_loader::ConfigLayerStack; +use crate::config_loader::ConfigRequirementsToml; use crate::config_loader::LoaderOverrides; use crate::config_loader::load_config_layers_state; use crate::config_loader::merge_toml_values; @@ -106,16 +107,7 @@ pub struct ConfigService { } impl ConfigService { - pub fn new(codex_home: PathBuf, cli_overrides: Vec<(String, TomlValue)>) -> Self { - Self { - codex_home, - cli_overrides, - loader_overrides: LoaderOverrides::default(), - } - } - - #[cfg(test)] - fn with_overrides( + pub fn new( codex_home: PathBuf, cli_overrides: Vec<(String, TomlValue)>, loader_overrides: LoaderOverrides, @@ -127,6 +119,14 @@ impl ConfigService { } } + pub fn new_with_defaults(codex_home: PathBuf) -> Self { + Self { + codex_home, + cli_overrides: Vec::new(), + loader_overrides: LoaderOverrides::default(), + } + } + pub async fn read( &self, params: ConfigReadParams, @@ -158,6 +158,22 @@ impl ConfigService { }) } + pub async fn read_requirements( + &self, + ) -> Result, ConfigServiceError> { + let layers = self + .load_thread_agnostic_config() + .await + .map_err(|err| ConfigServiceError::io("failed to read configuration layers", err))?; + + let requirements = layers.requirements_toml().clone(); + if requirements.is_empty() { + Ok(None) + } else { + Ok(Some(requirements)) + } + } + pub async fn write_value( &self, params: ConfigValueWriteParams, @@ -707,7 +723,7 @@ unified_exec = true "#; std::fs::write(tmp.path().join(CONFIG_TOML_FILE), original)?; - let service = ConfigService::new(tmp.path().to_path_buf(), vec![]); + let service = ConfigService::new_with_defaults(tmp.path().to_path_buf()); service .write_value(ConfigValueWriteParams { file_path: Some(tmp.path().join(CONFIG_TOML_FILE).display().to_string()), @@ -748,13 +764,14 @@ remote_compaction = true std::fs::write(&managed_path, "approval_policy = \"never\"").unwrap(); let managed_file = AbsolutePathBuf::try_from(managed_path.clone()).expect("managed file"); - let service = ConfigService::with_overrides( + let service = ConfigService::new( tmp.path().to_path_buf(), vec![], LoaderOverrides { managed_config_path: Some(managed_path.clone()), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }, ); @@ -828,13 +845,14 @@ remote_compaction = true std::fs::write(&managed_path, "approval_policy = \"never\"").unwrap(); let managed_file = AbsolutePathBuf::try_from(managed_path.clone()).expect("managed file"); - let service = ConfigService::with_overrides( + let service = ConfigService::new( tmp.path().to_path_buf(), vec![], LoaderOverrides { managed_config_path: Some(managed_path.clone()), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }, ); @@ -879,7 +897,7 @@ remote_compaction = true let user_path = tmp.path().join(CONFIG_TOML_FILE); std::fs::write(&user_path, "model = \"user\"").unwrap(); - let service = ConfigService::new(tmp.path().to_path_buf(), vec![]); + let service = ConfigService::new_with_defaults(tmp.path().to_path_buf()); let error = service .write_value(ConfigValueWriteParams { file_path: Some(tmp.path().join(CONFIG_TOML_FILE).display().to_string()), @@ -902,7 +920,7 @@ remote_compaction = true let tmp = tempdir().expect("tempdir"); std::fs::write(tmp.path().join(CONFIG_TOML_FILE), "").unwrap(); - let service = ConfigService::new(tmp.path().to_path_buf(), vec![]); + let service = ConfigService::new_with_defaults(tmp.path().to_path_buf()); service .write_value(ConfigValueWriteParams { file_path: None, @@ -930,13 +948,14 @@ remote_compaction = true let managed_path = tmp.path().join("managed_config.toml"); std::fs::write(&managed_path, "approval_policy = \"never\"").unwrap(); - let service = ConfigService::with_overrides( + let service = ConfigService::new( tmp.path().to_path_buf(), vec![], LoaderOverrides { managed_config_path: Some(managed_path.clone()), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }, ); @@ -977,13 +996,14 @@ remote_compaction = true TomlValue::String("session".to_string()), )]; - let service = ConfigService::with_overrides( + let service = ConfigService::new( tmp.path().to_path_buf(), cli_overrides, LoaderOverrides { managed_config_path: Some(managed_path.clone()), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }, ); @@ -1022,13 +1042,14 @@ remote_compaction = true std::fs::write(&managed_path, "approval_policy = \"never\"").unwrap(); let managed_file = AbsolutePathBuf::try_from(managed_path.clone()).expect("managed file"); - let service = ConfigService::with_overrides( + let service = ConfigService::new( tmp.path().to_path_buf(), vec![], LoaderOverrides { managed_config_path: Some(managed_path.clone()), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }, ); @@ -1080,7 +1101,7 @@ alpha = "a" std::fs::write(&path, base)?; - let service = ConfigService::new(tmp.path().to_path_buf(), vec![]); + let service = ConfigService::new_with_defaults(tmp.path().to_path_buf()); service .write_value(ConfigValueWriteParams { file_path: Some(path.display().to_string()), diff --git a/codex-rs/core/src/config/types.rs b/codex-rs/core/src/config/types.rs index 3aa72d5ce50..191b44c780a 100644 --- a/codex-rs/core/src/config/types.rs +++ b/codex-rs/core/src/config/types.rs @@ -273,6 +273,15 @@ pub enum HistoryPersistence { None, } +// ===== Analytics configuration ===== + +/// Analytics settings loaded from config.toml. Fields are optional so we can apply defaults. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default)] +pub struct AnalyticsConfigToml { + /// When `false`, disables analytics across Codex product surfaces in this profile. + pub enabled: Option, +} + // ===== OTEL configuration ===== #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] @@ -297,6 +306,7 @@ pub struct OtelTlsConfig { #[serde(rename_all = "kebab-case")] pub enum OtelExporterKind { None, + Statsig, OtlpHttp { endpoint: String, #[serde(default)] @@ -328,6 +338,11 @@ pub struct OtelConfigToml { /// Optional trace exporter pub trace_exporter: Option, + + /// Optional metrics exporter + /// + /// Defaults to `statsig` outside of tests. + pub metrics_exporter: Option, } /// Effective OTEL settings after defaults are applied. @@ -337,6 +352,7 @@ pub struct OtelConfig { pub environment: String, pub exporter: OtelExporterKind, pub trace_exporter: OtelExporterKind, + pub metrics_exporter: OtelExporterKind, } impl Default for OtelConfig { @@ -346,6 +362,7 @@ impl Default for OtelConfig { environment: DEFAULT_OTEL_ENVIRONMENT.to_owned(), exporter: OtelExporterKind::None, trace_exporter: OtelExporterKind::None, + metrics_exporter: OtelExporterKind::Statsig, } } } diff --git a/codex-rs/core/src/config_loader/config_requirements.rs b/codex-rs/core/src/config_loader/config_requirements.rs index feb854df696..efbf9d61e51 100644 --- a/codex-rs/core/src/config_loader/config_requirements.rs +++ b/codex-rs/core/src/config_loader/config_requirements.rs @@ -58,6 +58,10 @@ impl From for SandboxModeRequirement { } impl ConfigRequirementsToml { + pub fn is_empty(&self) -> bool { + self.allowed_approval_policies.is_none() && self.allowed_sandbox_modes.is_none() + } + /// For every field in `other` that is `Some`, if the corresponding field in /// `self` is `None`, copy the value from `other` into `self`. pub fn merge_unset_fields(&mut self, mut other: ConfigRequirementsToml) { diff --git a/codex-rs/core/src/config_loader/layer_io.rs b/codex-rs/core/src/config_loader/layer_io.rs index d4312729680..0ece69b4710 100644 --- a/codex-rs/core/src/config_loader/layer_io.rs +++ b/codex-rs/core/src/config_loader/layer_io.rs @@ -33,11 +33,13 @@ pub(super) async fn load_config_layers_internal( let LoaderOverrides { managed_config_path, managed_preferences_base64, + .. } = overrides; #[cfg(not(target_os = "macos"))] let LoaderOverrides { managed_config_path, + .. } = overrides; let managed_config_path = AbsolutePathBuf::from_absolute_path( @@ -91,12 +93,8 @@ pub(super) async fn read_config_from_path( } } -/// Return the default managed config path (honoring `CODEX_MANAGED_CONFIG_PATH`). +/// Return the default managed config path. pub(super) fn managed_config_default_path(codex_home: &Path) -> PathBuf { - if let Ok(path) = std::env::var("CODEX_MANAGED_CONFIG_PATH") { - return PathBuf::from(path); - } - #[cfg(unix)] { let _ = codex_home; diff --git a/codex-rs/core/src/config_loader/macos.rs b/codex-rs/core/src/config_loader/macos.rs index 4a80267b907..8d2289e9158 100644 --- a/codex-rs/core/src/config_loader/macos.rs +++ b/codex-rs/core/src/config_loader/macos.rs @@ -1,3 +1,4 @@ +use super::config_requirements::ConfigRequirementsToml; use base64::Engine; use base64::prelude::BASE64_STANDARD; use core_foundation::base::TCFType; @@ -10,6 +11,7 @@ use toml::Value as TomlValue; const MANAGED_PREFERENCES_APPLICATION_ID: &str = "com.openai.codex"; const MANAGED_PREFERENCES_CONFIG_KEY: &str = "config_toml_base64"; +const MANAGED_PREFERENCES_REQUIREMENTS_KEY: &str = "requirements_toml_base64"; pub(crate) async fn load_managed_admin_config_layer( override_base64: Option<&str>, @@ -19,82 +21,126 @@ pub(crate) async fn load_managed_admin_config_layer( return if trimmed.is_empty() { Ok(None) } else { - parse_managed_preferences_base64(trimmed).map(Some) + parse_managed_config_base64(trimmed).map(Some) }; } - const LOAD_ERROR: &str = "Failed to load managed preferences configuration"; - match task::spawn_blocking(load_managed_admin_config).await { Ok(result) => result, Err(join_err) => { if join_err.is_cancelled() { - tracing::error!("Managed preferences load task was cancelled"); + tracing::error!("Managed config load task was cancelled"); } else { - tracing::error!("Managed preferences load task failed: {join_err}"); + tracing::error!("Managed config load task failed: {join_err}"); } - Err(io::Error::other(LOAD_ERROR)) + Err(io::Error::other("Failed to load managed config")) } } } fn load_managed_admin_config() -> io::Result> { + load_managed_preference(MANAGED_PREFERENCES_CONFIG_KEY)? + .as_deref() + .map(str::trim) + .map(parse_managed_config_base64) + .transpose() +} + +pub(crate) async fn load_managed_admin_requirements_toml( + target: &mut ConfigRequirementsToml, + override_base64: Option<&str>, +) -> io::Result<()> { + if let Some(encoded) = override_base64 { + let trimmed = encoded.trim(); + if !trimmed.is_empty() { + target.merge_unset_fields(parse_managed_requirements_base64(trimmed)?); + } + return Ok(()); + } + + match task::spawn_blocking(load_managed_admin_requirements).await { + Ok(result) => { + if let Some(requirements) = result? { + target.merge_unset_fields(requirements); + } + Ok(()) + } + Err(join_err) => { + if join_err.is_cancelled() { + tracing::error!("Managed requirements load task was cancelled"); + } else { + tracing::error!("Managed requirements load task failed: {join_err}"); + } + Err(io::Error::other("Failed to load managed requirements")) + } + } +} + +fn load_managed_admin_requirements() -> io::Result> { + load_managed_preference(MANAGED_PREFERENCES_REQUIREMENTS_KEY)? + .as_deref() + .map(str::trim) + .map(parse_managed_requirements_base64) + .transpose() +} + +fn load_managed_preference(key_name: &str) -> io::Result> { #[link(name = "CoreFoundation", kind = "framework")] unsafe extern "C" { fn CFPreferencesCopyAppValue(key: CFStringRef, application_id: CFStringRef) -> *mut c_void; } - let application_id = CFString::new(MANAGED_PREFERENCES_APPLICATION_ID); - let key = CFString::new(MANAGED_PREFERENCES_CONFIG_KEY); - let value_ref = unsafe { CFPreferencesCopyAppValue( - key.as_concrete_TypeRef(), - application_id.as_concrete_TypeRef(), + CFString::new(key_name).as_concrete_TypeRef(), + CFString::new(MANAGED_PREFERENCES_APPLICATION_ID).as_concrete_TypeRef(), ) }; if value_ref.is_null() { tracing::debug!( - "Managed preferences for {} key {} not found", - MANAGED_PREFERENCES_APPLICATION_ID, - MANAGED_PREFERENCES_CONFIG_KEY + "Managed preferences for {MANAGED_PREFERENCES_APPLICATION_ID} key {key_name} not found", ); return Ok(None); } - let value = unsafe { CFString::wrap_under_create_rule(value_ref as _) }; - let contents = value.to_string(); - let trimmed = contents.trim(); - - parse_managed_preferences_base64(trimmed).map(Some) + let value = unsafe { CFString::wrap_under_create_rule(value_ref as _) }.to_string(); + Ok(Some(value)) } -fn parse_managed_preferences_base64(encoded: &str) -> io::Result { - let decoded = BASE64_STANDARD.decode(encoded.as_bytes()).map_err(|err| { - tracing::error!("Failed to decode managed preferences as base64: {err}"); - io::Error::new(io::ErrorKind::InvalidData, err) - })?; - - let decoded_str = String::from_utf8(decoded).map_err(|err| { - tracing::error!("Managed preferences base64 contents were not valid UTF-8: {err}"); - io::Error::new(io::ErrorKind::InvalidData, err) - })?; - - match toml::from_str::(&decoded_str) { +fn parse_managed_config_base64(encoded: &str) -> io::Result { + match toml::from_str::(&decode_managed_preferences_base64(encoded)?) { Ok(TomlValue::Table(parsed)) => Ok(TomlValue::Table(parsed)), Ok(other) => { - tracing::error!( - "Managed preferences TOML must have a table at the root, found {other:?}", - ); + tracing::error!("Managed config TOML must have a table at the root, found {other:?}",); Err(io::Error::new( io::ErrorKind::InvalidData, - "managed preferences root must be a table", + "managed config root must be a table", )) } Err(err) => { - tracing::error!("Failed to parse managed preferences TOML: {err}"); + tracing::error!("Failed to parse managed config TOML: {err}"); Err(io::Error::new(io::ErrorKind::InvalidData, err)) } } } + +fn parse_managed_requirements_base64(encoded: &str) -> io::Result { + toml::from_str::(&decode_managed_preferences_base64(encoded)?).map_err( + |err| { + tracing::error!("Failed to parse managed requirements TOML: {err}"); + io::Error::new(io::ErrorKind::InvalidData, err) + }, + ) +} + +fn decode_managed_preferences_base64(encoded: &str) -> io::Result { + String::from_utf8(BASE64_STANDARD.decode(encoded.as_bytes()).map_err(|err| { + tracing::error!("Failed to decode managed value as base64: {err}",); + io::Error::new(io::ErrorKind::InvalidData, err) + })?) + .map_err(|err| { + tracing::error!("Managed value base64 contents were not valid UTF-8: {err}",); + io::Error::new(io::ErrorKind::InvalidData, err) + }) +} diff --git a/codex-rs/core/src/config_loader/mod.rs b/codex-rs/core/src/config_loader/mod.rs index 73624c83c7a..bb995fda213 100644 --- a/codex-rs/core/src/config_loader/mod.rs +++ b/codex-rs/core/src/config_loader/mod.rs @@ -12,7 +12,6 @@ mod tests; use crate::config::CONFIG_TOML_FILE; use crate::config::ConfigToml; -use crate::config_loader::config_requirements::ConfigRequirementsToml; use crate::config_loader::layer_io::LoadedConfigLayers; use codex_app_server_protocol::ConfigLayerSource; use codex_protocol::config_types::SandboxMode; @@ -25,6 +24,8 @@ use std::path::Path; use toml::Value as TomlValue; pub use config_requirements::ConfigRequirements; +pub use config_requirements::ConfigRequirementsToml; +pub use config_requirements::SandboxModeRequirement; pub use merge::merge_toml_values; pub use state::ConfigLayerEntry; pub use state::ConfigLayerStack; @@ -78,8 +79,14 @@ pub async fn load_config_layers_state( ) -> io::Result { let mut config_requirements_toml = ConfigRequirementsToml::default(); - // TODO(gt): Support an entry in MDM for config requirements and use it - // with `config_requirements_toml.merge_unset_fields(...)`, if present. + #[cfg(target_os = "macos")] + macos::load_managed_admin_requirements_toml( + &mut config_requirements_toml, + overrides + .macos_managed_config_requirements_base64 + .as_deref(), + ) + .await?; // Honor /etc/codex/requirements.toml. if cfg!(unix) { @@ -101,8 +108,6 @@ pub async fn load_config_layers_state( let mut layers = Vec::::new(); - // TODO(gt): Honor managed preferences (macOS only). - // Include an entry for the "system" config folder, loading its config.toml, // if it exists. let system_config_toml_file = if cfg!(unix) { @@ -197,7 +202,9 @@ pub async fn load_config_layers_state( )); } - ConfigLayerStack::new(layers, config_requirements_toml.try_into()?) + let requirements_toml = config_requirements_toml.clone(); + let requirements = config_requirements_toml.try_into()?; + ConfigLayerStack::new(layers, requirements, requirements_toml) } /// Attempts to load a config.toml file from `config_toml`. diff --git a/codex-rs/core/src/config_loader/state.rs b/codex-rs/core/src/config_loader/state.rs index efb33dfac5b..2b01a22644b 100644 --- a/codex-rs/core/src/config_loader/state.rs +++ b/codex-rs/core/src/config_loader/state.rs @@ -1,4 +1,5 @@ use crate::config_loader::ConfigRequirements; +use crate::config_loader::ConfigRequirementsToml; use super::fingerprint::record_origins; use super::fingerprint::version_for_toml; @@ -12,11 +13,14 @@ use std::collections::HashMap; use std::path::PathBuf; use toml::Value as TomlValue; +/// LoaderOverrides overrides managed configuration inputs (primarily for tests). #[derive(Debug, Default, Clone)] pub struct LoaderOverrides { pub managed_config_path: Option, + //TODO(gt): Add a macos_ prefix to this field and remove the target_os check. #[cfg(target_os = "macos")] pub managed_preferences_base64: Option, + pub macos_managed_config_requirements_base64: Option, } #[derive(Debug, Clone, PartialEq)] @@ -83,18 +87,25 @@ pub struct ConfigLayerStack { /// Constraints that must be enforced when deriving a [Config] from the /// layers. requirements: ConfigRequirements, + + /// Raw requirements data as loaded from requirements.toml/MDM/legacy + /// sources. This preserves the original allow-lists so they can be + /// surfaced via APIs. + requirements_toml: ConfigRequirementsToml, } impl ConfigLayerStack { pub fn new( layers: Vec, requirements: ConfigRequirements, + requirements_toml: ConfigRequirementsToml, ) -> std::io::Result { let user_layer_index = verify_layer_ordering(&layers)?; Ok(Self { layers, user_layer_index, requirements, + requirements_toml, }) } @@ -108,6 +119,10 @@ impl ConfigLayerStack { &self.requirements } + pub fn requirements_toml(&self) -> &ConfigRequirementsToml { + &self.requirements_toml + } + /// Creates a new [ConfigLayerStack] using the specified values to inject a /// "user layer" into the stack. If such a layer already exists, it is /// replaced; otherwise, it is inserted into the stack at the appropriate @@ -128,6 +143,7 @@ impl ConfigLayerStack { layers, user_layer_index: self.user_layer_index, requirements: self.requirements.clone(), + requirements_toml: self.requirements_toml.clone(), } } None => { @@ -148,6 +164,7 @@ impl ConfigLayerStack { layers, user_layer_index: Some(user_layer_index), requirements: self.requirements.clone(), + requirements_toml: self.requirements_toml.clone(), } } } diff --git a/codex-rs/core/src/config_loader/tests.rs b/codex-rs/core/src/config_loader/tests.rs index bb8898129c9..b80f00c71c9 100644 --- a/codex-rs/core/src/config_loader/tests.rs +++ b/codex-rs/core/src/config_loader/tests.rs @@ -9,6 +9,8 @@ use crate::config_loader::config_requirements::ConfigRequirementsToml; use crate::config_loader::fingerprint::version_for_toml; use crate::config_loader::load_requirements_toml; use codex_protocol::protocol::AskForApproval; +#[cfg(target_os = "macos")] +use codex_protocol::protocol::SandboxPolicy; use codex_utils_absolute_path::AbsolutePathBuf; use pretty_assertions::assert_eq; use tempfile::tempdir; @@ -43,6 +45,7 @@ extra = true managed_config_path: Some(managed_path), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }; let cwd = AbsolutePathBuf::try_from(tmp.path()).expect("cwd"); @@ -73,10 +76,12 @@ extra = true async fn returns_empty_when_all_layers_missing() { let tmp = tempdir().expect("tempdir"); let managed_path = tmp.path().join("managed_config.toml"); + let overrides = LoaderOverrides { managed_config_path: Some(managed_path), #[cfg(target_os = "macos")] managed_preferences_base64: None, + macos_managed_config_requirements_base64: None, }; let cwd = AbsolutePathBuf::try_from(tmp.path()).expect("cwd"); @@ -141,12 +146,6 @@ async fn returns_empty_when_all_layers_missing() { async fn managed_preferences_take_highest_precedence() { use base64::Engine; - let managed_payload = r#" -[nested] -value = "managed" -flag = false -"#; - let encoded = base64::prelude::BASE64_STANDARD.encode(managed_payload.as_bytes()); let tmp = tempdir().expect("tempdir"); let managed_path = tmp.path().join("managed_config.toml"); @@ -168,7 +167,17 @@ flag = true let overrides = LoaderOverrides { managed_config_path: Some(managed_path), - managed_preferences_base64: Some(encoded), + managed_preferences_base64: Some( + base64::prelude::BASE64_STANDARD.encode( + r#" +[nested] +value = "managed" +flag = false +"# + .as_bytes(), + ), + ), + macos_managed_config_requirements_base64: None, }; let cwd = AbsolutePathBuf::try_from(tmp.path()).expect("cwd"); @@ -192,6 +201,108 @@ flag = true assert_eq!(nested.get("flag"), Some(&TomlValue::Boolean(false))); } +#[cfg(target_os = "macos")] +#[tokio::test] +async fn managed_preferences_requirements_are_applied() -> anyhow::Result<()> { + use base64::Engine; + + let tmp = tempdir()?; + + let state = load_config_layers_state( + tmp.path(), + Some(AbsolutePathBuf::try_from(tmp.path())?), + &[] as &[(String, TomlValue)], + LoaderOverrides { + managed_config_path: Some(tmp.path().join("managed_config.toml")), + managed_preferences_base64: Some(String::new()), + macos_managed_config_requirements_base64: Some( + base64::prelude::BASE64_STANDARD.encode( + r#" +allowed_approval_policies = ["never"] +allowed_sandbox_modes = ["read-only"] +"# + .as_bytes(), + ), + ), + }, + ) + .await?; + + assert_eq!( + state.requirements().approval_policy.value(), + AskForApproval::Never + ); + assert_eq!( + *state.requirements().sandbox_policy.get(), + SandboxPolicy::ReadOnly + ); + assert!( + state + .requirements() + .approval_policy + .can_set(&AskForApproval::OnRequest) + .is_err() + ); + assert!( + state + .requirements() + .sandbox_policy + .can_set(&SandboxPolicy::WorkspaceWrite { + writable_roots: Vec::new(), + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }) + .is_err() + ); + + Ok(()) +} + +#[cfg(target_os = "macos")] +#[tokio::test] +async fn managed_preferences_requirements_take_precedence() -> anyhow::Result<()> { + use base64::Engine; + + let tmp = tempdir()?; + let managed_path = tmp.path().join("managed_config.toml"); + + tokio::fs::write(&managed_path, "approval_policy = \"on-request\"\n").await?; + + let state = load_config_layers_state( + tmp.path(), + Some(AbsolutePathBuf::try_from(tmp.path())?), + &[] as &[(String, TomlValue)], + LoaderOverrides { + managed_config_path: Some(managed_path), + managed_preferences_base64: Some(String::new()), + macos_managed_config_requirements_base64: Some( + base64::prelude::BASE64_STANDARD.encode( + r#" +allowed_approval_policies = ["never"] +"# + .as_bytes(), + ), + ), + }, + ) + .await?; + + assert_eq!( + state.requirements().approval_policy.value(), + AskForApproval::Never + ); + assert!( + state + .requirements() + .approval_policy + .can_set(&AskForApproval::OnRequest) + .is_err() + ); + + Ok(()) +} + #[tokio::test(flavor = "current_thread")] async fn load_requirements_toml_produces_expected_constraints() -> anyhow::Result<()> { let tmp = tempdir()?; diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index c18ad7df8ec..8f16f731f49 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -5,6 +5,9 @@ use crate::truncate::approx_token_count; use crate::truncate::approx_tokens_from_byte_count; use crate::truncate::truncate_function_output_items_with_policy; use crate::truncate::truncate_text; +use crate::user_instructions::SkillInstructions; +use crate::user_instructions::UserInstructions; +use crate::user_shell_command::is_user_shell_command_text; use codex_protocol::models::ContentItem; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -13,7 +16,7 @@ use codex_protocol::protocol::TokenUsage; use codex_protocol::protocol::TokenUsageInfo; use std::ops::Deref; -/// Transcript of conversation history +/// Transcript of thread history #[derive(Debug, Clone, Default)] pub(crate) struct ContextManager { /// The oldest items are at the beginning of the vector. @@ -80,10 +83,9 @@ impl ContextManager { // Estimate token usage using byte-based heuristics from the truncation helpers. // This is a coarse lower bound, not a tokenizer-accurate count. pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option { - let model_family = turn_context.client.get_model_family(); - let base_tokens = - i64::try_from(approx_token_count(model_family.base_instructions.as_str())) - .unwrap_or(i64::MAX); + let model_info = turn_context.client.get_model_info(); + let base_instructions = model_info.base_instructions.as_str(); + let base_tokens = i64::try_from(approx_token_count(base_instructions)).unwrap_or(i64::MAX); let items_tokens = self.items.iter().fold(0i64, |acc, item| { acc + match item { @@ -152,6 +154,39 @@ impl ContextManager { } } + /// Drop the last `num_turns` user turns from this history. + /// + /// "User turns" are identified as `ResponseItem::Message` entries whose role is `"user"`. + /// + /// This mirrors thread-rollback semantics: + /// - `num_turns == 0` is a no-op + /// - if there are no user turns, this is a no-op + /// - if `num_turns` exceeds the number of user turns, all user turns are dropped while + /// preserving any items that occurred before the first user message. + pub(crate) fn drop_last_n_user_turns(&mut self, num_turns: u32) { + if num_turns == 0 { + return; + } + + // Keep behavior consistent with call sites that previously operated on `get_history()`: + // normalize first (call/output invariants), then truncate based on the normalized view. + let snapshot = self.get_history(); + let user_positions = user_message_positions(&snapshot); + let Some(&first_user_idx) = user_positions.first() else { + self.replace(snapshot); + return; + }; + + let n_from_end = usize::try_from(num_turns).unwrap_or(usize::MAX); + let cut_idx = if n_from_end >= user_positions.len() { + first_user_idx + } else { + user_positions[user_positions.len() - n_from_end] + }; + + self.replace(snapshot[..cut_idx].to_vec()); + } + pub(crate) fn update_token_info( &mut self, usage: &TokenUsage, @@ -291,6 +326,56 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize { .saturating_sub(650) } +fn is_session_prefix(text: &str) -> bool { + let trimmed = text.trim_start(); + let lowered = trimmed.to_ascii_lowercase(); + lowered.starts_with("") +} + +fn is_user_turn_boundary(item: &ResponseItem) -> bool { + let ResponseItem::Message { role, content, .. } = item else { + return false; + }; + + if role != "user" { + return false; + } + + if UserInstructions::is_user_instructions(content) + || SkillInstructions::is_skill_instructions(content) + { + return false; + } + + for content_item in content { + match content_item { + ContentItem::InputText { text } => { + if is_session_prefix(text) || is_user_shell_command_text(text) { + return false; + } + } + ContentItem::OutputText { text } => { + if is_session_prefix(text) { + return false; + } + } + ContentItem::InputImage { .. } => {} + } + } + + true +} + +fn user_message_positions(items: &[ResponseItem]) -> Vec { + let mut positions = Vec::new(); + for (idx, item) in items.iter().enumerate() { + if is_user_turn_boundary(item) { + positions.push(idx); + } + } + positions +} + #[cfg(test)] #[path = "history_tests.rs"] mod tests; diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index d121b7dc634..a89020f7e34 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -43,6 +43,16 @@ fn user_msg(text: &str) -> ResponseItem { } } +fn user_input_text_msg(text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: text.to_string(), + }], + } +} + fn reasoning_msg(text: &str) -> ResponseItem { ResponseItem::Reasoning { id: String::new(), @@ -227,6 +237,127 @@ fn remove_first_item_handles_local_shell_pair() { assert_eq!(h.contents(), vec![]); } +#[test] +fn drop_last_n_user_turns_preserves_prefix() { + let items = vec![ + assistant_msg("session prefix item"), + user_msg("u1"), + assistant_msg("a1"), + user_msg("u2"), + assistant_msg("a2"), + ]; + + let mut history = create_history_with_items(items); + history.drop_last_n_user_turns(1); + assert_eq!( + history.get_history(), + vec![ + assistant_msg("session prefix item"), + user_msg("u1"), + assistant_msg("a1"), + ] + ); + + let mut history = create_history_with_items(vec![ + assistant_msg("session prefix item"), + user_msg("u1"), + assistant_msg("a1"), + user_msg("u2"), + assistant_msg("a2"), + ]); + history.drop_last_n_user_turns(99); + assert_eq!( + history.get_history(), + vec![assistant_msg("session prefix item")] + ); +} + +#[test] +fn drop_last_n_user_turns_ignores_session_prefix_user_messages() { + let items = vec![ + user_input_text_msg("ctx"), + user_input_text_msg("do the thing"), + user_input_text_msg( + "# AGENTS.md instructions for test_directory\n\n\ntest_text\n", + ), + user_input_text_msg( + "\ndemo\nskills/demo/SKILL.md\nbody\n", + ), + user_input_text_msg("echo 42"), + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + user_input_text_msg("turn 2 user"), + assistant_msg("turn 2 assistant"), + ]; + + let mut history = create_history_with_items(items); + history.drop_last_n_user_turns(1); + + let expected_prefix_and_first_turn = vec![ + user_input_text_msg("ctx"), + user_input_text_msg("do the thing"), + user_input_text_msg( + "# AGENTS.md instructions for test_directory\n\n\ntest_text\n", + ), + user_input_text_msg( + "\ndemo\nskills/demo/SKILL.md\nbody\n", + ), + user_input_text_msg("echo 42"), + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + ]; + + assert_eq!(history.get_history(), expected_prefix_and_first_turn); + + let expected_prefix_only = vec![ + user_input_text_msg("ctx"), + user_input_text_msg("do the thing"), + user_input_text_msg( + "# AGENTS.md instructions for test_directory\n\n\ntest_text\n", + ), + user_input_text_msg( + "\ndemo\nskills/demo/SKILL.md\nbody\n", + ), + user_input_text_msg("echo 42"), + ]; + + let mut history = create_history_with_items(vec![ + user_input_text_msg("ctx"), + user_input_text_msg("do the thing"), + user_input_text_msg( + "# AGENTS.md instructions for test_directory\n\n\ntest_text\n", + ), + user_input_text_msg( + "\ndemo\nskills/demo/SKILL.md\nbody\n", + ), + user_input_text_msg("echo 42"), + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + user_input_text_msg("turn 2 user"), + assistant_msg("turn 2 assistant"), + ]); + history.drop_last_n_user_turns(2); + assert_eq!(history.get_history(), expected_prefix_only); + + let mut history = create_history_with_items(vec![ + user_input_text_msg("ctx"), + user_input_text_msg("do the thing"), + user_input_text_msg( + "# AGENTS.md instructions for test_directory\n\n\ntest_text\n", + ), + user_input_text_msg( + "\ndemo\nskills/demo/SKILL.md\nbody\n", + ), + user_input_text_msg("echo 42"), + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + user_input_text_msg("turn 2 user"), + assistant_msg("turn 2 assistant"), + ]); + history.drop_last_n_user_turns(3); + assert_eq!(history.get_history(), expected_prefix_only); +} + #[test] fn remove_first_item_handles_custom_tool_pair() { let items = vec![ @@ -462,7 +593,6 @@ fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { assert_truncated_message_matches(&truncated, "line-0-", 17_423); } -//TODO(aibrahim): run CI in release mode. #[cfg(not(debug_assertions))] #[test] fn normalize_adds_missing_output_for_function_call() { diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index e8fa91d26e8..c2bce1a2e72 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -8,7 +8,7 @@ use chrono::Datelike; use chrono::Local; use chrono::Utc; use codex_async_utils::CancelErr; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::RateLimitSnapshot; @@ -71,12 +71,12 @@ pub enum CodexErr { Stream(String, Option), #[error( - "Codex ran out of room in the model's context window. Start a new conversation or clear earlier history before retrying." + "Codex ran out of room in the model's context window. Start a new thread or clear earlier history before retrying." )] ContextWindowExceeded, - #[error("no conversation with id: {0}")] - ConversationNotFound(ConversationId), + #[error("no thread with id: {0}")] + ThreadNotFound(ThreadId), #[error("session configured event was not the first event in the stream")] SessionConfiguredNotFirstEvent, @@ -181,6 +181,43 @@ impl From for CodexErr { } } +impl CodexErr { + pub fn is_retryable(&self) -> bool { + match self { + CodexErr::TurnAborted + | CodexErr::Interrupted + | CodexErr::EnvVar(_) + | CodexErr::Fatal(_) + | CodexErr::UsageNotIncluded + | CodexErr::QuotaExceeded + | CodexErr::InvalidImageRequest() + | CodexErr::InvalidRequest(_) + | CodexErr::RefreshTokenFailed(_) + | CodexErr::UnsupportedOperation(_) + | CodexErr::Sandbox(_) + | CodexErr::LandlockSandboxExecutableNotProvided + | CodexErr::RetryLimit(_) + | CodexErr::ContextWindowExceeded + | CodexErr::ThreadNotFound(_) + | CodexErr::Spawn + | CodexErr::SessionConfiguredNotFirstEvent + | CodexErr::UsageLimitReached(_) => false, + CodexErr::Stream(..) + | CodexErr::Timeout + | CodexErr::UnexpectedStatus(_) + | CodexErr::ResponseStreamFailed(_) + | CodexErr::ConnectionFailed(_) + | CodexErr::InternalServerError + | CodexErr::InternalAgentDied + | CodexErr::Io(_) + | CodexErr::Json(_) + | CodexErr::TokioJoin(_) => true, + #[cfg(target_os = "linux")] + CodexErr::LandlockRuleset(_) | CodexErr::LandlockPathFd(_) => false, + } + } +} + #[derive(Debug)] pub struct ConnectionFailedError { pub source: reqwest::Error, @@ -455,7 +492,7 @@ impl CodexErr { CodexErr::SessionConfiguredNotFirstEvent | CodexErr::InternalServerError | CodexErr::InternalAgentDied => CodexErrorInfo::InternalServerError, - CodexErr::UnsupportedOperation(_) | CodexErr::ConversationNotFound(_) => { + CodexErr::UnsupportedOperation(_) | CodexErr::ThreadNotFound(_) => { CodexErrorInfo::BadRequest } CodexErr::Sandbox(_) => CodexErrorInfo::SandboxError, diff --git a/codex-rs/core/src/exec_policy.rs b/codex-rs/core/src/exec_policy.rs index 5caf7d9b82d..d8880a9b3c4 100644 --- a/codex-rs/core/src/exec_policy.rs +++ b/codex-rs/core/src/exec_policy.rs @@ -28,11 +28,10 @@ use crate::features::Feature; use crate::features::Features; use crate::sandboxing::SandboxPermissions; use crate::tools::sandboxing::ExecApprovalRequirement; +use shlex::try_join as shlex_try_join; -const FORBIDDEN_REASON: &str = "execpolicy forbids this command"; const PROMPT_CONFLICT_REASON: &str = - "execpolicy requires approval for this command, but AskForApproval is set to Never"; -const PROMPT_REASON: &str = "execpolicy requires approval for this command"; + "approval required by policy, but AskForApproval is set to Never"; const RULES_DIR_NAME: &str = "rules"; const RULE_EXTENSION: &str = "rules"; const DEFAULT_POLICY_FILE: &str = "default.rules"; @@ -128,7 +127,7 @@ impl ExecPolicyManager { match evaluation.decision { Decision::Forbidden => ExecApprovalRequirement::Forbidden { - reason: FORBIDDEN_REASON.to_string(), + reason: derive_forbidden_reason(command, &evaluation), }, Decision::Prompt => { if matches!(approval_policy, AskForApproval::Never) { @@ -137,7 +136,7 @@ impl ExecPolicyManager { } } else { ExecApprovalRequirement::NeedsApproval { - reason: derive_prompt_reason(&evaluation), + reason: derive_prompt_reason(command, &evaluation), proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) { try_derive_execpolicy_amendment_for_prompt_rules( &evaluation.matched_rules, @@ -299,15 +298,69 @@ fn try_derive_execpolicy_amendment_for_allow_rules( }) } -/// Only return PROMPT_REASON when an execpolicy rule drove the prompt decision. -fn derive_prompt_reason(evaluation: &Evaluation) -> Option { - evaluation.matched_rules.iter().find_map(|rule_match| { - if is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt { - Some(PROMPT_REASON.to_string()) - } else { - None +/// Only return a reason when a policy rule drove the prompt decision. +fn derive_prompt_reason(command_args: &[String], evaluation: &Evaluation) -> Option { + let command = render_shlex_command(command_args); + + let most_specific_prompt = evaluation + .matched_rules + .iter() + .filter_map(|rule_match| match rule_match { + RuleMatch::PrefixRuleMatch { + matched_prefix, + decision: Decision::Prompt, + justification, + .. + } => Some((matched_prefix.len(), justification.as_deref())), + _ => None, + }) + .max_by_key(|(matched_prefix_len, _)| *matched_prefix_len); + + match most_specific_prompt { + Some((_matched_prefix_len, Some(justification))) => { + Some(format!("`{command}` requires approval: {justification}")) + } + Some((_matched_prefix_len, None)) => { + Some(format!("`{command}` requires approval by policy")) + } + None => None, + } +} + +fn render_shlex_command(args: &[String]) -> String { + shlex_try_join(args.iter().map(String::as_str)).unwrap_or_else(|_| args.join(" ")) +} + +/// Derive a string explaining why the command was forbidden. If `justification` +/// is set by the user, this can contain instructions with recommended +/// alternatives, for example. +fn derive_forbidden_reason(command_args: &[String], evaluation: &Evaluation) -> String { + let command = render_shlex_command(command_args); + + let most_specific_forbidden = evaluation + .matched_rules + .iter() + .filter_map(|rule_match| match rule_match { + RuleMatch::PrefixRuleMatch { + matched_prefix, + decision: Decision::Forbidden, + justification, + .. + } => Some((matched_prefix, justification.as_deref())), + _ => None, + }) + .max_by_key(|(matched_prefix, _)| matched_prefix.len()); + + match most_specific_forbidden { + Some((_matched_prefix, Some(justification))) => { + format!("`{command}` rejected: {justification}") + } + Some((matched_prefix, None)) => { + let prefix = render_shlex_command(matched_prefix); + format!("`{command}` rejected: policy forbids commands starting with `{prefix}`") } - }) + None => format!("`{command}` rejected: blocked by policy"), + } } async fn collect_policy_files(dir: impl AsRef) -> Result, ExecPolicyError> { @@ -368,6 +421,7 @@ mod tests { use crate::config_loader::ConfigLayerEntry; use crate::config_loader::ConfigLayerStack; use crate::config_loader::ConfigRequirements; + use crate::config_loader::ConfigRequirementsToml; use crate::features::Feature; use crate::features::Features; use codex_app_server_protocol::ConfigLayerSource; @@ -388,7 +442,12 @@ mod tests { ConfigLayerSource::Project { dot_codex_folder }, TomlValue::Table(Default::default()), ); - ConfigLayerStack::new(vec![layer], ConfigRequirements::default()).expect("ConfigLayerStack") + ConfigLayerStack::new( + vec![layer], + ConfigRequirements::default(), + ConfigRequirementsToml::default(), + ) + .expect("ConfigLayerStack") } #[tokio::test] @@ -450,7 +509,8 @@ mod tests { decision: Decision::Forbidden, matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["rm".to_string()], - decision: Decision::Forbidden + decision: Decision::Forbidden, + justification: None, }], }, policy.check_multiple(command.iter(), &|_| Decision::Allow) @@ -519,7 +579,11 @@ mod tests { TomlValue::Table(Default::default()), ), ]; - let config_stack = ConfigLayerStack::new(layers, ConfigRequirements::default())?; + let config_stack = ConfigLayerStack::new( + layers, + ConfigRequirements::default(), + ConfigRequirementsToml::default(), + )?; let policy = load_exec_policy(&config_stack).await?; @@ -528,7 +592,8 @@ mod tests { decision: Decision::Forbidden, matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["rm".to_string()], - decision: Decision::Forbidden + decision: Decision::Forbidden, + justification: None, }], }, policy.check_multiple([vec!["rm".to_string()]].iter(), &|_| Decision::Allow) @@ -538,7 +603,8 @@ mod tests { decision: Decision::Prompt, matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["ls".to_string()], - decision: Decision::Prompt + decision: Decision::Prompt, + justification: None, }], }, policy.check_multiple([vec!["ls".to_string()]].iter(), &|_| Decision::Allow) @@ -560,7 +626,7 @@ prefix_rule(pattern=["rm"], decision="forbidden") let forbidden_script = vec![ "bash".to_string(), "-lc".to_string(), - "rm -rf /tmp".to_string(), + "rm -rf /some/important/folder".to_string(), ]; let manager = ExecPolicyManager::new(policy); @@ -577,7 +643,45 @@ prefix_rule(pattern=["rm"], decision="forbidden") assert_eq!( requirement, ExecApprovalRequirement::Forbidden { - reason: FORBIDDEN_REASON.to_string() + reason: "`bash -lc 'rm -rf /some/important/folder'` rejected: policy forbids commands starting with `rm`".to_string() + } + ); + } + + #[tokio::test] + async fn justification_is_included_in_forbidden_exec_approval_requirement() { + let policy_src = r#" +prefix_rule( + pattern=["rm"], + decision="forbidden", + justification="destructive command", +) +"#; + let mut parser = PolicyParser::new(); + parser + .parse("test.rules", policy_src) + .expect("parse policy"); + let policy = Arc::new(parser.build()); + + let manager = ExecPolicyManager::new(policy); + let requirement = manager + .create_exec_approval_requirement_for_command( + &Features::with_defaults(), + &[ + "rm".to_string(), + "-rf".to_string(), + "/some/important/folder".to_string(), + ], + AskForApproval::OnRequest, + &SandboxPolicy::DangerFullAccess, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::Forbidden { + reason: "`rm -rf /some/important/folder` rejected: destructive command".to_string() } ); } @@ -606,7 +710,7 @@ prefix_rule(pattern=["rm"], decision="forbidden") assert_eq!( requirement, ExecApprovalRequirement::NeedsApproval { - reason: Some(PROMPT_REASON.to_string()), + reason: Some("`rm` requires approval by policy".to_string()), proposed_execpolicy_amendment: None, } ); @@ -824,7 +928,7 @@ prefix_rule(pattern=["rm"], decision="forbidden") assert_eq!( requirement, ExecApprovalRequirement::NeedsApproval { - reason: Some(PROMPT_REASON.to_string()), + reason: Some("`rm` requires approval by policy".to_string()), proposed_execpolicy_amendment: None, } ); diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 3b22bfc3f45..cfa5a0acc61 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -7,6 +7,7 @@ use crate::config::ConfigToml; use crate::config::profile::ConfigProfile; +use codex_otel::OtelManager; use serde::Deserialize; use serde::Serialize; use std::collections::BTreeMap; @@ -60,10 +61,6 @@ pub enum Feature { // Stable. /// Create a ghost commit at each turn. GhostCommit, - /// Include the view_image tool. - ViewImageTool, - /// Send warnings to the model to correct it on the tool usage. - ModelWarnings, /// Enable the default shell tool. ShellTool, @@ -72,8 +69,11 @@ pub enum Feature { UnifiedExec, /// Include the freeform apply_patch tool. ApplyPatchFreeform, - /// Allow the model to request web searches. + /// Allow the model to request web searches that fetch live content. WebSearchRequest, + /// Allow the model to request web searches that fetch cached content. + /// Takes precedence over `WebSearchRequest`. + WebSearchCached, /// Gate the execpolicy enforcement for shell/unified exec. ExecPolicy, /// Enable Windows sandbox (restricted token) on Windows. @@ -84,16 +84,14 @@ pub enum Feature { RemoteCompaction, /// Refresh remote models and emit AppReady once the list is available. RemoteModels, - /// Allow model to call multiple tools in parallel (only for models supporting it). - ParallelToolCalls, /// Experimental shell snapshotting. ShellSnapshot, /// Experimental TUI v2 (viewport) implementation. Tui2, - /// Enable discovery and injection of skills. - Skills, /// Enforce UTF8 output in Powershell. PowershellUtf8, + /// Compress request bodies (zstd) when sending streaming requests to codex-backend. + EnableRequestCompression, } impl Feature { @@ -196,6 +194,21 @@ impl Features { .map(|usage| (usage.alias.as_str(), usage.feature)) } + pub fn emit_metrics(&self, otel: &OtelManager) { + for feature in FEATURES { + if self.enabled(feature.id) != feature.default_enabled { + otel.counter( + "codex.feature.state", + 1, + &[ + ("feature", feature.key), + ("value", &self.enabled(feature.id).to_string()), + ], + ); + } + } + } + /// Apply a table of key -> bool toggles (e.g. from TOML). pub fn apply_map(&mut self, m: &BTreeMap) { for (k, v) in m { @@ -228,7 +241,6 @@ impl Features { experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch, experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool, tools_web_search: cfg.tools.as_ref().and_then(|t| t.web_search), - tools_view_image: cfg.tools.as_ref().and_then(|t| t.view_image), ..Default::default() }; base_legacy.apply(&mut features); @@ -244,7 +256,6 @@ impl Features { experimental_use_unified_exec_tool: config_profile.experimental_use_unified_exec_tool, tools_web_search: config_profile.tools_web_search, - tools_view_image: config_profile.tools_view_image, }; profile_legacy.apply(&mut features); if let Some(profile_features) = config_profile.features.as_ref() { @@ -300,36 +311,24 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Stable, default_enabled: false, }, - FeatureSpec { - id: Feature::ParallelToolCalls, - key: "parallel", - stage: Stage::Stable, - default_enabled: true, - }, - FeatureSpec { - id: Feature::ViewImageTool, - key: "view_image_tool", - stage: Stage::Stable, - default_enabled: true, - }, FeatureSpec { id: Feature::ShellTool, key: "shell_tool", stage: Stage::Stable, default_enabled: true, }, - FeatureSpec { - id: Feature::ModelWarnings, - key: "warnings", - stage: Stage::Stable, - default_enabled: true, - }, FeatureSpec { id: Feature::WebSearchRequest, key: "web_search_request", stage: Stage::Stable, default_enabled: false, }, + FeatureSpec { + id: Feature::WebSearchCached, + key: "web_search_cached", + stage: Stage::Experimental, + default_enabled: false, + }, // Beta program. Rendered in the `/experimental` menu for users. FeatureSpec { id: Feature::UnifiedExec, @@ -337,7 +336,7 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Beta { name: "Background terminal", menu_description: "Run long-running terminal commands in the background.", - announcement: "NEW! Try Background terminals for long running processes. Enable in /experimental!", + announcement: "NEW! Try Background terminals for long-running commands. Enable in /experimental!", }, default_enabled: false, }, @@ -388,14 +387,14 @@ pub const FEATURES: &[FeatureSpec] = &[ default_enabled: false, }, FeatureSpec { - id: Feature::Skills, - key: "skills", + id: Feature::PowershellUtf8, + key: "powershell_utf8", stage: Stage::Experimental, - default_enabled: true, + default_enabled: false, }, FeatureSpec { - id: Feature::PowershellUtf8, - key: "powershell_utf8", + id: Feature::EnableRequestCompression, + key: "enable_request_compression", stage: Stage::Experimental, default_enabled: false, }, diff --git a/codex-rs/core/src/features/legacy.rs b/codex-rs/core/src/features/legacy.rs index 09a982569f6..ed508ffb5a7 100644 --- a/codex-rs/core/src/features/legacy.rs +++ b/codex-rs/core/src/features/legacy.rs @@ -47,7 +47,6 @@ pub struct LegacyFeatureToggles { pub experimental_use_freeform_apply_patch: Option, pub experimental_use_unified_exec_tool: Option, pub tools_web_search: Option, - pub tools_view_image: Option, } impl LegacyFeatureToggles { @@ -76,12 +75,6 @@ impl LegacyFeatureToggles { self.tools_web_search, "tools.web_search", ); - set_if_some( - features, - Feature::ViewImageTool, - self.tools_view_image, - "tools.view_image", - ); } } diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 87944840835..370c1ecb97e 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -12,9 +12,10 @@ pub mod bash; mod client; mod client_common; pub mod codex; -mod codex_conversation; +mod codex_thread; mod compact_remote; -pub use codex_conversation::CodexConversation; +pub use codex_thread::CodexThread; +mod agent; mod codex_delegate; mod command_safety; pub mod config; @@ -59,13 +60,19 @@ pub use model_provider_info::OLLAMA_OSS_PROVIDER_ID; pub use model_provider_info::WireApi; pub use model_provider_info::built_in_model_providers; pub use model_provider_info::create_oss_provider_with_base_url; -mod conversation_manager; mod event_mapping; pub mod review_format; pub mod review_prompts; +mod thread_manager; pub use codex_protocol::protocol::InitialHistory; -pub use conversation_manager::ConversationManager; -pub use conversation_manager::NewConversation; +pub use thread_manager::NewThread; +pub use thread_manager::ThreadManager; +#[deprecated(note = "use ThreadManager")] +pub type ConversationManager = ThreadManager; +#[deprecated(note = "use NewThread")] +pub type NewConversation = NewThread; +#[deprecated(note = "use CodexThread")] +pub type CodexConversation = CodexThread; // Re-export common auth types for workspace consumers pub use auth::AuthManager; pub use auth::CodexAuth; @@ -86,10 +93,12 @@ pub use rollout::INTERACTIVE_SESSION_SOURCES; pub use rollout::RolloutRecorder; pub use rollout::SESSIONS_SUBDIR; pub use rollout::SessionMeta; +#[deprecated(note = "use find_thread_path_by_id_str")] pub use rollout::find_conversation_path_by_id_str; -pub use rollout::list::ConversationItem; -pub use rollout::list::ConversationsPage; +pub use rollout::find_thread_path_by_id_str; pub use rollout::list::Cursor; +pub use rollout::list::ThreadItem; +pub use rollout::list::ThreadsPage; pub use rollout::list::parse_cursor; pub use rollout::list::read_head_for_summary; mod function_tool; diff --git a/codex-rs/core/src/mcp_connection_manager.rs b/codex-rs/core/src/mcp_connection_manager.rs index 6c0b48b1bd5..dcd1edf80c8 100644 --- a/codex-rs/core/src/mcp_connection_manager.rs +++ b/codex-rs/core/src/mcp_connection_manager.rs @@ -79,26 +79,60 @@ pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10); /// Default timeout for individual tool calls. const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60); +/// The Responses API requires tool names to match `^[a-zA-Z0-9_-]+$`. +/// MCP server/tool names are user-controlled, so sanitize the fully-qualified +/// name we expose to the model by replacing any disallowed character with `_`. +fn sanitize_responses_api_tool_name(name: &str) -> String { + let mut sanitized = String::with_capacity(name.len()); + for c in name.chars() { + if c.is_ascii_alphanumeric() || c == '_' || c == '-' { + sanitized.push(c); + } else { + sanitized.push('_'); + } + } + + if sanitized.is_empty() { + "_".to_string() + } else { + sanitized + } +} + +fn sha1_hex(s: &str) -> String { + let mut hasher = Sha1::new(); + hasher.update(s.as_bytes()); + let sha1 = hasher.finalize(); + format!("{sha1:x}") +} + fn qualify_tools(tools: I) -> HashMap where I: IntoIterator, { let mut used_names = HashSet::new(); + let mut seen_raw_names = HashSet::new(); let mut qualified_tools = HashMap::new(); for tool in tools { - let mut qualified_name = format!( + let qualified_name_raw = format!( "mcp{}{}{}{}", MCP_TOOL_NAME_DELIMITER, tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name ); - if qualified_name.len() > MAX_TOOL_NAME_LENGTH { - let mut hasher = Sha1::new(); - hasher.update(qualified_name.as_bytes()); - let sha1 = hasher.finalize(); - let sha1_str = format!("{sha1:x}"); + if !seen_raw_names.insert(qualified_name_raw.clone()) { + warn!("skipping duplicated tool {}", qualified_name_raw); + continue; + } - // Truncate to make room for the hash suffix - let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len(); + // Start from a "pretty" name (sanitized), then deterministically disambiguate on + // collisions by appending a hash of the *raw* (unsanitized) qualified name. This + // ensures tools like `foo.bar` and `foo_bar` don't collapse to the same key. + let mut qualified_name = sanitize_responses_api_tool_name(&qualified_name_raw); + // Enforce length constraints early; use the raw name for the hash input so the + // output remains stable even when sanitization changes. + if qualified_name.len() > MAX_TOOL_NAME_LENGTH { + let sha1_str = sha1_hex(&qualified_name_raw); + let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len(); qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str); } @@ -1035,6 +1069,28 @@ mod tests { ); } + #[test] + fn test_qualify_tools_sanitizes_invalid_characters() { + let tools = vec![create_test_tool("server.one", "tool.two")]; + + let qualified_tools = qualify_tools(tools); + + assert_eq!(qualified_tools.len(), 1); + let (qualified_name, tool) = qualified_tools.into_iter().next().expect("one tool"); + assert_eq!(qualified_name, "mcp__server_one__tool_two"); + + // The key is sanitized for OpenAI, but we keep original parts for the actual MCP call. + assert_eq!(tool.server_name, "server.one"); + assert_eq!(tool.tool_name, "tool.two"); + + assert!( + qualified_name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-'), + "qualified name must be Responses API compatible: {qualified_name:?}" + ); + } + #[test] fn tool_filter_allows_by_default() { let filter = ToolFilter::default(); diff --git a/codex-rs/core/src/message_history.rs b/codex-rs/core/src/message_history.rs index 733e8e80089..cb3b10098c2 100644 --- a/codex-rs/core/src/message_history.rs +++ b/codex-rs/core/src/message_history.rs @@ -13,6 +13,8 @@ //! trailing `\n`) and write it with a **single `write(2)` system call** while //! the file descriptor is opened with the `O_APPEND` flag. POSIX guarantees //! that writes up to `PIPE_BUF` bytes are atomic in that case. +//! Note: `conversation_id` stores the thread id; the field name is preserved for +//! backwards compatibility with existing history files. use std::fs::File; use std::fs::OpenOptions; @@ -36,7 +38,7 @@ use tokio::io::AsyncReadExt; use crate::config::Config; use crate::config::types::HistoryPersistence; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; #[cfg(unix)] use std::os::unix::fs::OpenOptionsExt; #[cfg(unix)] @@ -69,7 +71,7 @@ fn history_filepath(config: &Config) -> PathBuf { /// which entails a small amount of blocking I/O internally. pub(crate) async fn append_entry( text: &str, - conversation_id: &ConversationId, + conversation_id: &ThreadId, config: &Config, ) -> Result<()> { match config.history.persistence { @@ -402,7 +404,7 @@ fn history_log_id(_metadata: &std::fs::Metadata) -> Option { mod tests { use super::*; use crate::config::ConfigBuilder; - use codex_protocol::ConversationId; + use codex_protocol::ThreadId; use pretty_assertions::assert_eq; use std::fs::File; use std::io::Write; @@ -497,7 +499,7 @@ mod tests { .await .expect("load config"); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let entry_one = "a".repeat(200); let entry_two = "b".repeat(200); @@ -544,7 +546,7 @@ mod tests { .await .expect("load config"); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let short_entry = "a".repeat(200); let long_entry = "b".repeat(400); diff --git a/codex-rs/core/src/models_manager/manager.rs b/codex-rs/core/src/models_manager/manager.rs index 060f4a5c278..87ff1b76d7a 100644 --- a/codex-rs/core/src/models_manager/manager.rs +++ b/codex-rs/core/src/models_manager/manager.rs @@ -24,7 +24,7 @@ use crate::default_client::build_reqwest_client; use crate::error::Result as CoreResult; use crate::features::Feature; use crate::model_provider_info::ModelProviderInfo; -use crate::models_manager::model_family::ModelFamily; +use crate::models_manager::model_info; use crate::models_manager::model_presets::builtin_model_presets; const MODEL_CACHE_FILE: &str = "models_cache.json"; @@ -36,7 +36,6 @@ const CODEX_AUTO_BALANCED_MODEL: &str = "codex-auto-balanced"; /// Coordinates remote model discovery plus cached metadata on disk. #[derive(Debug)] pub struct ModelsManager { - // todo(aibrahim) merge available_models and model family creation into one struct local_models: Vec, remote_models: RwLock>, auth_manager: Arc, @@ -48,8 +47,7 @@ pub struct ModelsManager { impl ModelsManager { /// Construct a manager scoped to the provided `AuthManager`. - pub fn new(auth_manager: Arc) -> Self { - let codex_home = auth_manager.codex_home().to_path_buf(); + pub fn new(codex_home: PathBuf, auth_manager: Arc) -> Self { Self { local_models: builtin_model_presets(auth_manager.get_auth_mode()), remote_models: RwLock::new(Self::load_remote_models_from_file().unwrap_or_default()), @@ -63,8 +61,11 @@ impl ModelsManager { #[cfg(any(test, feature = "test-support"))] /// Construct a manager scoped to the provided `AuthManager` with a specific provider. Used for integration tests. - pub fn with_provider(auth_manager: Arc, provider: ModelProviderInfo) -> Self { - let codex_home = auth_manager.codex_home().to_path_buf(); + pub fn with_provider( + codex_home: PathBuf, + auth_manager: Arc, + provider: ModelProviderInfo, + ) -> Self { Self { local_models: builtin_model_presets(auth_manager.get_auth_mode()), remote_models: RwLock::new(Self::load_remote_models_from_file().unwrap_or_default()), @@ -128,15 +129,19 @@ impl ModelsManager { Ok(self.build_available_models(remote_models)) } - fn find_family_for_model(slug: &str) -> ModelFamily { - super::model_family::find_family_for_model(slug) - } - - /// Look up the requested model family while applying remote metadata overrides. - pub async fn construct_model_family(&self, model: &str, config: &Config) -> ModelFamily { - Self::find_family_for_model(model) - .with_remote_overrides(self.remote_models(config).await) - .with_config_overrides(config) + /// Look up the requested model metadata while applying remote metadata overrides. + pub async fn construct_model_info(&self, model: &str, config: &Config) -> ModelInfo { + let remote = self + .remote_models(config) + .await + .into_iter() + .find(|m| m.slug == model); + let model = if let Some(remote) = remote { + remote + } else { + model_info::find_model_info_for_slug(model) + }; + model_info::with_config_overrides(model, config) } pub async fn get_model(&self, model: &Option, config: &Config) -> String { @@ -149,14 +154,14 @@ impl ModelsManager { // if codex-auto-balanced exists & signed in with chatgpt mode, return it, otherwise return the default model let auth_mode = self.auth_manager.get_auth_mode(); let remote_models = self.remote_models(config).await; - if auth_mode == Some(AuthMode::ChatGPT) - && self + if auth_mode == Some(AuthMode::ChatGPT) { + let has_auto_balanced = self .build_available_models(remote_models) .iter() - .any(|m| m.model == CODEX_AUTO_BALANCED_MODEL) - { - return CODEX_AUTO_BALANCED_MODEL.to_string(); - } else if auth_mode == Some(AuthMode::ChatGPT) { + .any(|model| model.model == CODEX_AUTO_BALANCED_MODEL && model.show_in_picker); + if has_auto_balanced { + return CODEX_AUTO_BALANCED_MODEL.to_string(); + } return OPENAI_DEFAULT_CHATGPT_MODEL.to_string(); } OPENAI_DEFAULT_API_MODEL.to_string() @@ -180,9 +185,9 @@ impl ModelsManager { } #[cfg(any(test, feature = "test-support"))] - /// Offline helper that builds a `ModelFamily` without consulting remote state. - pub fn construct_model_family_offline(model: &str, config: &Config) -> ModelFamily { - Self::find_family_for_model(model).with_config_overrides(config) + /// Offline helper that builds a `ModelInfo` without consulting remote state. + pub fn construct_model_info_offline(model: &str, config: &Config) -> ModelInfo { + model_info::with_config_overrides(model_info::find_model_info_for_slug(model), config) } async fn get_etag(&self) -> Option { @@ -247,10 +252,15 @@ impl ModelsManager { merged_presets = self.filter_visible_models(merged_presets); let has_default = merged_presets.iter().any(|preset| preset.is_default); - if let Some(default) = merged_presets.first_mut() - && !has_default - { - default.is_default = true; + if !has_default { + if let Some(default) = merged_presets + .iter_mut() + .find(|preset| preset.show_in_picker) + { + default.is_default = true; + } else if let Some(default) = merged_presets.first_mut() { + default.is_default = true; + } } merged_presets @@ -260,7 +270,7 @@ impl ModelsManager { let chatgpt_mode = self.auth_manager.get_auth_mode() == Some(AuthMode::ChatGPT); models .into_iter() - .filter(|model| model.show_in_picker && (chatgpt_mode || model.supported_in_api)) + .filter(|model| chatgpt_mode || model.supported_in_api) .collect() } @@ -358,14 +368,14 @@ mod tests { "supported_in_api": true, "priority": priority, "upgrade": null, - "base_instructions": null, + "base_instructions": "base instructions", "supports_reasoning_summaries": false, "support_verbosity": false, "default_verbosity": null, "apply_patch_tool_type": null, "truncation_policy": {"mode": "bytes", "limit": 10_000}, "supports_parallel_tool_calls": false, - "context_window": null, + "context_window": 272_000, "experimental_supported_tools": [], })) .expect("valid model") @@ -414,7 +424,8 @@ mod tests { let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::create_dummy_chatgpt_auth_for_testing()); let provider = provider_for(server.uri()); - let manager = ModelsManager::with_provider(auth_manager, provider); + let manager = + ModelsManager::with_provider(codex_home.path().to_path_buf(), auth_manager, provider); manager .refresh_available_models_with_cache(&config) @@ -473,7 +484,8 @@ mod tests { AuthCredentialsStoreMode::File, )); let provider = provider_for(server.uri()); - let manager = ModelsManager::with_provider(auth_manager, provider); + let manager = + ModelsManager::with_provider(codex_home.path().to_path_buf(), auth_manager, provider); manager .refresh_available_models_with_cache(&config) @@ -527,7 +539,8 @@ mod tests { AuthCredentialsStoreMode::File, )); let provider = provider_for(server.uri()); - let manager = ModelsManager::with_provider(auth_manager, provider); + let manager = + ModelsManager::with_provider(codex_home.path().to_path_buf(), auth_manager, provider); manager .refresh_available_models_with_cache(&config) @@ -597,7 +610,8 @@ mod tests { let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::create_dummy_chatgpt_auth_for_testing()); let provider = provider_for(server.uri()); - let mut manager = ModelsManager::with_provider(auth_manager, provider); + let mut manager = + ModelsManager::with_provider(codex_home.path().to_path_buf(), auth_manager, provider); manager.cache_ttl = Duration::ZERO; manager @@ -645,21 +659,24 @@ mod tests { #[test] fn build_available_models_picks_default_after_hiding_hidden_models() { + let codex_home = tempdir().expect("temp dir"); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let provider = provider_for("http://example.test".to_string()); - let mut manager = ModelsManager::with_provider(auth_manager, provider); + let mut manager = + ModelsManager::with_provider(codex_home.path().to_path_buf(), auth_manager, provider); manager.local_models = Vec::new(); let hidden_model = remote_model_with_visibility("hidden", "Hidden", 0, "hide"); let visible_model = remote_model_with_visibility("visible", "Visible", 1, "list"); - let mut expected = ModelPreset::from(visible_model.clone()); - expected.is_default = true; + let expected_hidden = ModelPreset::from(hidden_model.clone()); + let mut expected_visible = ModelPreset::from(visible_model.clone()); + expected_visible.is_default = true; let available = manager.build_available_models(vec![hidden_model, visible_model]); - assert_eq!(available, vec![expected]); + assert_eq!(available, vec![expected_hidden, expected_visible]); } #[test] diff --git a/codex-rs/core/src/models_manager/mod.rs b/codex-rs/core/src/models_manager/mod.rs index 83ed30e8724..d0e3c8214a5 100644 --- a/codex-rs/core/src/models_manager/mod.rs +++ b/codex-rs/core/src/models_manager/mod.rs @@ -1,4 +1,4 @@ pub mod cache; pub mod manager; -pub mod model_family; +pub mod model_info; pub mod model_presets; diff --git a/codex-rs/core/src/models_manager/model_family.rs b/codex-rs/core/src/models_manager/model_family.rs deleted file mode 100644 index 9a6904cea1e..00000000000 --- a/codex-rs/core/src/models_manager/model_family.rs +++ /dev/null @@ -1,557 +0,0 @@ -use codex_protocol::config_types::Verbosity; -use codex_protocol::openai_models::ApplyPatchToolType; -use codex_protocol::openai_models::ConfigShellToolType; -use codex_protocol::openai_models::ModelInfo; -use codex_protocol::openai_models::ReasoningEffort; - -use crate::config::Config; -use crate::truncate::TruncationPolicy; - -/// The `instructions` field in the payload sent to a model should always start -/// with this content. -const BASE_INSTRUCTIONS: &str = include_str!("../../prompt.md"); - -const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../../gpt_5_codex_prompt.md"); -const GPT_5_1_INSTRUCTIONS: &str = include_str!("../../gpt_5_1_prompt.md"); -const GPT_5_2_INSTRUCTIONS: &str = include_str!("../../gpt_5_2_prompt.md"); -const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../../gpt-5.1-codex-max_prompt.md"); -const GPT_5_2_CODEX_INSTRUCTIONS: &str = include_str!("../../gpt-5.2-codex_prompt.md"); -pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000; - -/// A model family is a group of models that share certain characteristics. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ModelFamily { - /// The full model slug used to derive this model family, e.g. - /// "gpt-4.1-2025-04-14". - pub slug: String, - - /// The model family name, e.g. "gpt-4.1". This string is used when deriving - /// default metadata for the family, such as context windows. - pub family: String, - - /// True if the model needs additional instructions on how to use the - /// "virtual" `apply_patch` CLI. - pub needs_special_apply_patch_instructions: bool, - - /// Maximum supported context window, if known. - pub context_window: Option, - - /// Token threshold for automatic compaction if config does not override it. - auto_compact_token_limit: Option, - - // Whether the `reasoning` field can be set when making a request to this - // model family. Note it has `effort` and `summary` subfields (though - // `summary` is optional). - pub supports_reasoning_summaries: bool, - - // The reasoning effort to use for this model family when none is explicitly chosen. - pub default_reasoning_effort: Option, - - /// Whether this model supports parallel tool calls when using the - /// Responses API. - pub supports_parallel_tool_calls: bool, - - /// Present if the model performs better when `apply_patch` is provided as - /// a tool call instead of just a bash command - pub apply_patch_tool_type: Option, - - // Instructions to use for querying the model - pub base_instructions: String, - - /// Names of beta tools that should be exposed to this model family. - pub experimental_supported_tools: Vec, - - /// Percentage of the context window considered usable for inputs, after - /// reserving headroom for system prompts, tool overhead, and model output. - /// This is applied when computing the effective context window seen by - /// consumers. - pub effective_context_window_percent: i64, - - /// If the model family supports setting the verbosity level when using Responses API. - pub support_verbosity: bool, - - // The default verbosity level for this model family when using Responses API. - pub default_verbosity: Option, - - /// Preferred shell tool type for this model family when features do not override it. - pub shell_type: ConfigShellToolType, - - pub truncation_policy: TruncationPolicy, -} - -impl ModelFamily { - pub(super) fn with_config_overrides(mut self, config: &Config) -> Self { - if let Some(supports_reasoning_summaries) = config.model_supports_reasoning_summaries { - self.supports_reasoning_summaries = supports_reasoning_summaries; - } - if let Some(context_window) = config.model_context_window { - self.context_window = Some(context_window); - } - if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit { - self.auto_compact_token_limit = Some(auto_compact_token_limit); - } - self - } - pub(super) fn with_remote_overrides(mut self, remote_models: Vec) -> Self { - for model in remote_models { - if model.slug == self.slug { - self.apply_remote_overrides(model); - } - } - self - } - - fn apply_remote_overrides(&mut self, model: ModelInfo) { - let ModelInfo { - slug: _, - display_name: _, - description: _, - default_reasoning_level, - supported_reasoning_levels: _, - shell_type, - visibility: _, - supported_in_api: _, - priority: _, - upgrade: _, - base_instructions, - supports_reasoning_summaries, - support_verbosity, - default_verbosity, - apply_patch_tool_type, - truncation_policy, - supports_parallel_tool_calls, - context_window, - experimental_supported_tools, - } = model; - - self.default_reasoning_effort = Some(default_reasoning_level); - self.shell_type = shell_type; - if let Some(base) = base_instructions { - self.base_instructions = base; - } - self.supports_reasoning_summaries = supports_reasoning_summaries; - self.support_verbosity = support_verbosity; - self.default_verbosity = default_verbosity; - self.apply_patch_tool_type = apply_patch_tool_type; - self.truncation_policy = truncation_policy.into(); - self.supports_parallel_tool_calls = supports_parallel_tool_calls; - self.context_window = context_window; - self.experimental_supported_tools = experimental_supported_tools; - } - - pub fn auto_compact_token_limit(&self) -> Option { - self.auto_compact_token_limit - .or(self.context_window.map(Self::default_auto_compact_limit)) - } - - const fn default_auto_compact_limit(context_window: i64) -> i64 { - (context_window * 9) / 10 - } - - pub fn get_model_slug(&self) -> &str { - &self.slug - } -} - -macro_rules! model_family { - ( - $slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)? - ) => {{ - // defaults - #[allow(unused_mut)] - let mut mf = ModelFamily { - slug: $slug.to_string(), - family: $family.to_string(), - needs_special_apply_patch_instructions: false, - context_window: Some(CONTEXT_WINDOW_272K), - auto_compact_token_limit: None, - supports_reasoning_summaries: false, - supports_parallel_tool_calls: false, - apply_patch_tool_type: None, - base_instructions: BASE_INSTRUCTIONS.to_string(), - experimental_supported_tools: Vec::new(), - effective_context_window_percent: 95, - support_verbosity: false, - shell_type: ConfigShellToolType::Default, - default_verbosity: None, - default_reasoning_effort: None, - truncation_policy: TruncationPolicy::Bytes(10_000), - }; - - // apply overrides - $( - mf.$key = $value; - )* - mf - }}; -} - -/// Internal offline helper for `ModelsManager` that returns a `ModelFamily` for the given -/// model slug. -#[allow(clippy::if_same_then_else)] -pub(super) fn find_family_for_model(slug: &str) -> ModelFamily { - if slug.starts_with("o3") { - model_family!( - slug, "o3", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - context_window: Some(200_000), - ) - } else if slug.starts_with("o4-mini") { - model_family!( - slug, "o4-mini", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - context_window: Some(200_000), - ) - } else if slug.starts_with("codex-mini-latest") { - model_family!( - slug, "codex-mini-latest", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - shell_type: ConfigShellToolType::Local, - context_window: Some(200_000), - ) - } else if slug.starts_with("gpt-4.1") { - model_family!( - slug, "gpt-4.1", - needs_special_apply_patch_instructions: true, - context_window: Some(1_047_576), - ) - } else if slug.starts_with("gpt-oss") || slug.starts_with("openai/gpt-oss") { - model_family!( - slug, "gpt-oss", - apply_patch_tool_type: Some(ApplyPatchToolType::Function), - context_window: Some(96_000), - ) - } else if slug.starts_with("gpt-4o") { - model_family!( - slug, "gpt-4o", - needs_special_apply_patch_instructions: true, - context_window: Some(128_000), - ) - } else if slug.starts_with("gpt-3.5") { - model_family!( - slug, "gpt-3.5", - needs_special_apply_patch_instructions: true, - context_window: Some(16_385), - ) - } else if slug.starts_with("test-gpt-5") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), - experimental_supported_tools: vec![ - "grep_files".to_string(), - "list_dir".to_string(), - "read_file".to_string(), - "test_sync_tool".to_string(), - ], - supports_parallel_tool_calls: true, - shell_type: ConfigShellToolType::ShellCommand, - support_verbosity: true, - truncation_policy: TruncationPolicy::Tokens(10_000), - ) - - // Experimental models. - } else if slug.starts_with("exp-codex") || slug.starts_with("codex-1p") { - // Same as gpt-5.1-codex-max. - model_family!( - slug, slug, - supports_reasoning_summaries: true, - base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("exp-") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - support_verbosity: true, - default_verbosity: Some(Verbosity::Low), - base_instructions: BASE_INSTRUCTIONS.to_string(), - default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy::Bytes(10_000), - shell_type: ConfigShellToolType::UnifiedExec, - supports_parallel_tool_calls: true, - context_window: Some(CONTEXT_WINDOW_272K), - ) - - // Production models. - } else if slug.starts_with("gpt-5.2-codex") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("bengalfox") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("gpt-5.1-codex-max") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: false, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("gpt-5-codex") - || slug.starts_with("gpt-5.1-codex") - || slug.starts_with("codex-") - { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: false, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("gpt-5.2") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - support_verbosity: true, - default_verbosity: Some(Verbosity::Low), - base_instructions: GPT_5_2_INSTRUCTIONS.to_string(), - default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy::Bytes(10_000), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("boomslang") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - support_verbosity: true, - default_verbosity: Some(Verbosity::Low), - base_instructions: GPT_5_2_INSTRUCTIONS.to_string(), - default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy::Bytes(10_000), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("gpt-5.1") { - model_family!( - slug, "gpt-5.1", - supports_reasoning_summaries: true, - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - support_verbosity: true, - default_verbosity: Some(Verbosity::Low), - base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), - default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy::Bytes(10_000), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else if slug.starts_with("gpt-5") { - model_family!( - slug, "gpt-5", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - shell_type: ConfigShellToolType::Default, - support_verbosity: true, - truncation_policy: TruncationPolicy::Bytes(10_000), - context_window: Some(CONTEXT_WINDOW_272K), - ) - } else { - derive_default_model_family(slug) - } -} - -fn derive_default_model_family(model: &str) -> ModelFamily { - tracing::warn!("Unknown model {model} is used. This will degrade the performance of Codex."); - ModelFamily { - slug: model.to_string(), - family: model.to_string(), - needs_special_apply_patch_instructions: false, - context_window: None, - auto_compact_token_limit: None, - supports_reasoning_summaries: false, - supports_parallel_tool_calls: false, - apply_patch_tool_type: None, - base_instructions: BASE_INSTRUCTIONS.to_string(), - experimental_supported_tools: Vec::new(), - effective_context_window_percent: 95, - support_verbosity: false, - shell_type: ConfigShellToolType::Default, - default_verbosity: None, - default_reasoning_effort: None, - truncation_policy: TruncationPolicy::Bytes(10_000), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use codex_protocol::openai_models::ModelVisibility; - use codex_protocol::openai_models::ReasoningEffortPreset; - use codex_protocol::openai_models::TruncationPolicyConfig; - - fn remote(slug: &str, effort: ReasoningEffort, shell: ConfigShellToolType) -> ModelInfo { - ModelInfo { - slug: slug.to_string(), - display_name: slug.to_string(), - description: Some(format!("{slug} desc")), - default_reasoning_level: effort, - supported_reasoning_levels: vec![ReasoningEffortPreset { - effort, - description: effort.to_string(), - }], - shell_type: shell, - visibility: ModelVisibility::List, - supported_in_api: true, - priority: 1, - upgrade: None, - base_instructions: None, - supports_reasoning_summaries: false, - support_verbosity: false, - default_verbosity: None, - apply_patch_tool_type: None, - truncation_policy: TruncationPolicyConfig::bytes(10_000), - supports_parallel_tool_calls: false, - context_window: None, - experimental_supported_tools: Vec::new(), - } - } - - #[test] - fn remote_overrides_apply_when_slug_matches() { - let family = model_family!("gpt-4o-mini", "gpt-4o-mini"); - assert_ne!(family.default_reasoning_effort, Some(ReasoningEffort::High)); - - let updated = family.with_remote_overrides(vec![ - remote( - "gpt-4o-mini", - ReasoningEffort::High, - ConfigShellToolType::ShellCommand, - ), - remote( - "other-model", - ReasoningEffort::Low, - ConfigShellToolType::UnifiedExec, - ), - ]); - - assert_eq!( - updated.default_reasoning_effort, - Some(ReasoningEffort::High) - ); - assert_eq!(updated.shell_type, ConfigShellToolType::ShellCommand); - } - - #[test] - fn remote_overrides_skip_non_matching_models() { - let family = model_family!( - "codex-mini-latest", - "codex-mini-latest", - shell_type: ConfigShellToolType::Local - ); - - let updated = family.clone().with_remote_overrides(vec![remote( - "other", - ReasoningEffort::High, - ConfigShellToolType::ShellCommand, - )]); - - assert_eq!( - updated.default_reasoning_effort, - family.default_reasoning_effort - ); - assert_eq!(updated.shell_type, family.shell_type); - } - - #[test] - fn remote_overrides_apply_extended_metadata() { - let family = model_family!( - "gpt-5.1", - "gpt-5.1", - supports_reasoning_summaries: false, - support_verbosity: false, - default_verbosity: None, - apply_patch_tool_type: Some(ApplyPatchToolType::Function), - supports_parallel_tool_calls: false, - experimental_supported_tools: vec!["local".to_string()], - truncation_policy: TruncationPolicy::Bytes(10_000), - context_window: Some(100), - ); - - let updated = family.with_remote_overrides(vec![ModelInfo { - slug: "gpt-5.1".to_string(), - display_name: "gpt-5.1".to_string(), - description: Some("desc".to_string()), - default_reasoning_level: ReasoningEffort::High, - supported_reasoning_levels: vec![ReasoningEffortPreset { - effort: ReasoningEffort::High, - description: "High".to_string(), - }], - shell_type: ConfigShellToolType::ShellCommand, - visibility: ModelVisibility::List, - supported_in_api: true, - priority: 10, - upgrade: None, - base_instructions: Some("Remote instructions".to_string()), - supports_reasoning_summaries: true, - support_verbosity: true, - default_verbosity: Some(Verbosity::High), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - truncation_policy: TruncationPolicyConfig::tokens(2_000), - supports_parallel_tool_calls: true, - context_window: Some(400_000), - experimental_supported_tools: vec!["alpha".to_string(), "beta".to_string()], - }]); - - assert_eq!( - updated.default_reasoning_effort, - Some(ReasoningEffort::High) - ); - assert!(updated.supports_reasoning_summaries); - assert!(updated.support_verbosity); - assert_eq!(updated.default_verbosity, Some(Verbosity::High)); - assert_eq!(updated.shell_type, ConfigShellToolType::ShellCommand); - assert_eq!( - updated.apply_patch_tool_type, - Some(ApplyPatchToolType::Freeform) - ); - assert_eq!(updated.truncation_policy, TruncationPolicy::Tokens(2_000)); - assert!(updated.supports_parallel_tool_calls); - assert_eq!(updated.context_window, Some(400_000)); - assert_eq!( - updated.experimental_supported_tools, - vec!["alpha".to_string(), "beta".to_string()] - ); - assert_eq!(updated.base_instructions, "Remote instructions"); - } -} diff --git a/codex-rs/core/src/models_manager/model_info.rs b/codex-rs/core/src/models_manager/model_info.rs new file mode 100644 index 00000000000..6e46b93586a --- /dev/null +++ b/codex-rs/core/src/models_manager/model_info.rs @@ -0,0 +1,363 @@ +use codex_protocol::config_types::Verbosity; +use codex_protocol::openai_models::ApplyPatchToolType; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::TruncationMode; +use codex_protocol::openai_models::TruncationPolicyConfig; + +use crate::config::Config; +use crate::truncate::approx_bytes_for_tokens; +use tracing::warn; + +const BASE_INSTRUCTIONS: &str = include_str!("../../prompt.md"); +const BASE_INSTRUCTIONS_WITH_APPLY_PATCH: &str = + include_str!("../../prompt_with_apply_patch_instructions.md"); + +const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../../gpt_5_codex_prompt.md"); +const GPT_5_1_INSTRUCTIONS: &str = include_str!("../../gpt_5_1_prompt.md"); +const GPT_5_2_INSTRUCTIONS: &str = include_str!("../../gpt_5_2_prompt.md"); +const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../../gpt-5.1-codex-max_prompt.md"); +const GPT_5_2_CODEX_INSTRUCTIONS: &str = include_str!("../../gpt-5.2-codex_prompt.md"); + +pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000; + +macro_rules! model_info { + ( + $slug:expr $(, $key:ident : $value:expr )* $(,)? + ) => {{ + #[allow(unused_mut)] + let mut model = ModelInfo { + slug: $slug.to_string(), + display_name: $slug.to_string(), + description: None, + // This is primarily used when remote metadata is available. When running + // offline, core generally omits the effort field unless explicitly + // configured by the user. + default_reasoning_level: None, + supported_reasoning_levels: supported_reasoning_level_low_medium_high(), + shell_type: ConfigShellToolType::Default, + visibility: ModelVisibility::None, + supported_in_api: true, + priority: 99, + upgrade: None, + base_instructions: BASE_INSTRUCTIONS.to_string(), + supports_reasoning_summaries: false, + support_verbosity: false, + default_verbosity: None, + apply_patch_tool_type: None, + truncation_policy: TruncationPolicyConfig::bytes(10_000), + supports_parallel_tool_calls: false, + context_window: Some(CONTEXT_WINDOW_272K), + auto_compact_token_limit: None, + effective_context_window_percent: 95, + experimental_supported_tools: Vec::new(), + }; + + $( + model.$key = $value; + )* + model + }}; +} + +pub(crate) fn with_config_overrides(mut model: ModelInfo, config: &Config) -> ModelInfo { + if let Some(supports_reasoning_summaries) = config.model_supports_reasoning_summaries { + model.supports_reasoning_summaries = supports_reasoning_summaries; + } + if let Some(context_window) = config.model_context_window { + model.context_window = Some(context_window); + } + if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit { + model.auto_compact_token_limit = Some(auto_compact_token_limit); + } + if let Some(token_limit) = config.tool_output_token_limit { + model.truncation_policy = match model.truncation_policy.mode { + TruncationMode::Bytes => { + let byte_limit = + i64::try_from(approx_bytes_for_tokens(token_limit)).unwrap_or(i64::MAX); + TruncationPolicyConfig::bytes(byte_limit) + } + TruncationMode::Tokens => { + let limit = i64::try_from(token_limit).unwrap_or(i64::MAX); + TruncationPolicyConfig::tokens(limit) + } + }; + } + model +} + +// todo(aibrahim): remove most of the entries here when enabling models.json +pub(crate) fn find_model_info_for_slug(slug: &str) -> ModelInfo { + if slug.starts_with("o3") || slug.starts_with("o4-mini") { + model_info!( + slug, + base_instructions: BASE_INSTRUCTIONS_WITH_APPLY_PATCH.to_string(), + supports_reasoning_summaries: true, + context_window: Some(200_000), + ) + } else if slug.starts_with("codex-mini-latest") { + model_info!( + slug, + base_instructions: BASE_INSTRUCTIONS_WITH_APPLY_PATCH.to_string(), + shell_type: ConfigShellToolType::Local, + supports_reasoning_summaries: true, + context_window: Some(200_000), + ) + } else if slug.starts_with("gpt-4.1") { + model_info!( + slug, + base_instructions: BASE_INSTRUCTIONS_WITH_APPLY_PATCH.to_string(), + supports_reasoning_summaries: false, + context_window: Some(1_047_576), + ) + } else if slug.starts_with("gpt-oss") || slug.starts_with("openai/gpt-oss") { + model_info!( + slug, + apply_patch_tool_type: Some(ApplyPatchToolType::Function), + context_window: Some(96_000), + ) + } else if slug.starts_with("gpt-4o") { + model_info!( + slug, + base_instructions: BASE_INSTRUCTIONS_WITH_APPLY_PATCH.to_string(), + supports_reasoning_summaries: false, + context_window: Some(128_000), + ) + } else if slug.starts_with("gpt-3.5") { + model_info!( + slug, + base_instructions: BASE_INSTRUCTIONS_WITH_APPLY_PATCH.to_string(), + supports_reasoning_summaries: false, + context_window: Some(16_385), + ) + } else if slug.starts_with("test-gpt-5") { + model_info!( + slug, + base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), + experimental_supported_tools: vec![ + "grep_files".to_string(), + "list_dir".to_string(), + "read_file".to_string(), + "test_sync_tool".to_string(), + ], + supports_parallel_tool_calls: true, + supports_reasoning_summaries: true, + shell_type: ConfigShellToolType::ShellCommand, + support_verbosity: true, + truncation_policy: TruncationPolicyConfig::tokens(10_000), + ) + } else if slug.starts_with("exp-codex") || slug.starts_with("codex-1p") { + model_info!( + slug, + base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + supports_reasoning_summaries: true, + support_verbosity: false, + truncation_policy: TruncationPolicyConfig::tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("exp-") { + model_info!( + slug, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + supports_reasoning_summaries: true, + support_verbosity: true, + default_verbosity: Some(Verbosity::Low), + base_instructions: BASE_INSTRUCTIONS.to_string(), + default_reasoning_level: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicyConfig::bytes(10_000), + shell_type: ConfigShellToolType::UnifiedExec, + supports_parallel_tool_calls: true, + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("gpt-5.2-codex") || slug.starts_with("bengalfox") { + model_info!( + slug, + base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + supports_reasoning_summaries: true, + support_verbosity: false, + truncation_policy: TruncationPolicyConfig::tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + supported_reasoning_levels: supported_reasoning_level_low_medium_high_xhigh(), + ) + } else if slug.starts_with("gpt-5.1-codex-max") { + model_info!( + slug, + base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: false, + supports_reasoning_summaries: true, + support_verbosity: false, + truncation_policy: TruncationPolicyConfig::tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + supported_reasoning_levels: supported_reasoning_level_low_medium_high_xhigh(), + ) + } else if (slug.starts_with("gpt-5-codex") + || slug.starts_with("gpt-5.1-codex") + || slug.starts_with("codex-")) + && !slug.contains("-mini") + { + model_info!( + slug, + base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: false, + supports_reasoning_summaries: true, + support_verbosity: false, + truncation_policy: TruncationPolicyConfig::tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + supported_reasoning_levels: supported_reasoning_level_low_medium_high(), + ) + } else if slug.starts_with("gpt-5-codex") + || slug.starts_with("gpt-5.1-codex") + || slug.starts_with("codex-") + { + model_info!( + slug, + base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: false, + supports_reasoning_summaries: true, + support_verbosity: false, + truncation_policy: TruncationPolicyConfig::tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if (slug.starts_with("gpt-5.2") || slug.starts_with("boomslang")) + && !slug.contains("codex") + { + model_info!( + slug, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + supports_reasoning_summaries: true, + support_verbosity: true, + default_verbosity: Some(Verbosity::Low), + base_instructions: GPT_5_2_INSTRUCTIONS.to_string(), + default_reasoning_level: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicyConfig::bytes(10_000), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + context_window: Some(CONTEXT_WINDOW_272K), + supported_reasoning_levels: supported_reasoning_level_low_medium_high_xhigh_non_codex(), + ) + } else if slug.starts_with("gpt-5.1") && !slug.contains("codex") { + model_info!( + slug, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + supports_reasoning_summaries: true, + support_verbosity: true, + default_verbosity: Some(Verbosity::Low), + base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), + default_reasoning_level: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicyConfig::bytes(10_000), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + context_window: Some(CONTEXT_WINDOW_272K), + supported_reasoning_levels: supported_reasoning_level_low_medium_high_non_codex(), + ) + } else if slug.starts_with("gpt-5") { + model_info!( + slug, + base_instructions: BASE_INSTRUCTIONS_WITH_APPLY_PATCH.to_string(), + shell_type: ConfigShellToolType::Default, + supports_reasoning_summaries: true, + support_verbosity: true, + truncation_policy: TruncationPolicyConfig::bytes(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else { + warn!("Unknown model {slug} is used. This will degrade the performance of Codex."); + model_info!( + slug, + context_window: None, + supported_reasoning_levels: Vec::new(), + default_reasoning_level: None + ) + } +} + +fn supported_reasoning_level_low_medium_high() -> Vec { + vec![ + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: "Fast responses with lighter reasoning".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: "Balances speed and reasoning depth for everyday tasks".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: "Greater reasoning depth for complex problems".to_string(), + }, + ] +} + +fn supported_reasoning_level_low_medium_high_non_codex() -> Vec { + vec![ + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: "Balances speed with some reasoning; useful for straightforward queries and short explanations".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), + }, + ] +} + +fn supported_reasoning_level_low_medium_high_xhigh() -> Vec { + vec![ + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: "Fast responses with lighter reasoning".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: "Balances speed and reasoning depth for everyday tasks".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: "Greater reasoning depth for complex problems".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::XHigh, + description: "Extra high reasoning depth for complex problems".to_string(), + }, + ] +} + +fn supported_reasoning_level_low_medium_high_xhigh_non_codex() -> Vec { + vec![ + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: "Balances speed with some reasoning; useful for straightforward queries and short explanations".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::XHigh, + description: "Extra high reasoning for complex problems".to_string(), + }, + ] +} diff --git a/codex-rs/core/src/models_manager/model_presets.rs b/codex-rs/core/src/models_manager/model_presets.rs index 0a7e7857843..080c44433bc 100644 --- a/codex-rs/core/src/models_manager/model_presets.rs +++ b/codex-rs/core/src/models_manager/model_presets.rs @@ -112,7 +112,7 @@ static PRESETS: Lazy> = Lazy::new(|| { }, ReasoningEffortPreset { effort: ReasoningEffort::XHigh, - description: "Extra high reasoning for complex problems".to_string(), + description: "Extra high reasoning depth for complex problems".to_string(), }, ], is_default: false, @@ -170,7 +170,7 @@ static PRESETS: Lazy> = Lazy::new(|| { }, ReasoningEffortPreset { effort: ReasoningEffort::XHigh, - description: "Extra high reasoning for complex problems".to_string(), + description: "Extra high reasoning depth for complex problems".to_string(), }, ], is_default: false, @@ -322,11 +322,7 @@ fn gpt_52_codex_upgrade() -> ModelUpgrade { } pub(super) fn builtin_model_presets(_auth_mode: Option) -> Vec { - PRESETS - .iter() - .filter(|preset| preset.show_in_picker) - .cloned() - .collect() + PRESETS.iter().cloned().collect() } #[cfg(any(test, feature = "test-support"))] diff --git a/codex-rs/core/src/otel_init.rs b/codex-rs/core/src/otel_init.rs index ece5a6bf500..f9bf75e8acb 100644 --- a/codex-rs/core/src/otel_init.rs +++ b/codex-rs/core/src/otel_init.rs @@ -6,7 +6,7 @@ use codex_otel::config::OtelExporter; use codex_otel::config::OtelHttpProtocol; use codex_otel::config::OtelSettings; use codex_otel::config::OtelTlsConfig as OtelTlsSettings; -use codex_otel::otel_provider::OtelProvider; +use codex_otel::traces::otel_provider::OtelProvider; use std::error::Error; /// Build an OpenTelemetry provider from the app Config. @@ -18,6 +18,7 @@ pub fn build_provider( ) -> Result, Box> { let to_otel_exporter = |kind: &Kind| match kind { Kind::None => OtelExporter::None, + Kind::Statsig => OtelExporter::Statsig, Kind::OtlpHttp { endpoint, headers, @@ -63,6 +64,11 @@ pub fn build_provider( let exporter = to_otel_exporter(&config.otel.exporter); let trace_exporter = to_otel_exporter(&config.otel.trace_exporter); + let metrics_exporter = if config.analytics { + to_otel_exporter(&config.otel.metrics_exporter) + } else { + OtelExporter::None + }; OtelProvider::from(&OtelSettings { service_name: originator().value.to_owned(), @@ -71,6 +77,7 @@ pub fn build_provider( environment: config.otel.environment.to_string(), exporter, trace_exporter, + metrics_exporter, }) } diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index cb2499cbbbc..79f82c45985 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -513,9 +513,9 @@ mod tests { ) .unwrap_or_else(|_| cfg.codex_home.join("skills/pdf-processing/SKILL.md")); let expected_path_str = expected_path.to_string_lossy().replace('\\', "/"); - let usage_rules = "- Discovery: Available skills are listed in project docs and may also appear in a runtime \"## Skills\" section (name + description + file path). These are the sources of truth; skill bodies live on disk at the listed paths.\n- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback.\n- How to use a skill (progressive disclosure):\n 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow.\n 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything.\n 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.\n 4) If `assets/` or templates exist, reuse them instead of recreating from scratch.\n- Description as trigger: The YAML `description` in `SKILL.md` is the primary trigger signal; rely on it to decide applicability. If unsure, ask a brief clarification before proceeding.\n- Coordination and sequencing:\n - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.\n - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.\n- Context hygiene:\n - Keep context small: summarize long sections instead of pasting them; only load extra files when needed.\n - Avoid deeply nested references; prefer one-hop files explicitly linked from `SKILL.md`.\n - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice.\n- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."; + let usage_rules = "- Discovery: The list above is the skills available in this session (name + description + file path). Skill bodies live on disk at the listed paths.\n- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description shown above, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback.\n- How to use a skill (progressive disclosure):\n 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow.\n 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything.\n 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.\n 4) If `assets/` or templates exist, reuse them instead of recreating from scratch.\n- Coordination and sequencing:\n - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.\n - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.\n- Context hygiene:\n - Keep context small: summarize long sections instead of pasting them; only load extra files when needed.\n - Avoid deep reference-chasing: prefer opening only files directly linked from `SKILL.md` unless you're blocked.\n - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice.\n- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."; let expected = format!( - "base doc\n\n## Skills\nThese skills are discovered at startup from multiple local sources. Each entry includes a name, description, and file path so you can open the source for full instructions.\n- pdf-processing: extract from pdfs (file: {expected_path_str})\n{usage_rules}" + "base doc\n\n## Skills\nA skill is a set of local instructions to follow that is stored in a `SKILL.md` file. Below is the list of skills that can be used. Each entry includes a name, description, and file path so you can open the source for full instructions when using a specific skill.\n### Available skills\n- pdf-processing: extract from pdfs (file: {expected_path_str})\n### How to use skills\n{usage_rules}" ); assert_eq!(res, expected); } @@ -537,9 +537,9 @@ mod tests { dunce::canonicalize(cfg.codex_home.join("skills/linting/SKILL.md").as_path()) .unwrap_or_else(|_| cfg.codex_home.join("skills/linting/SKILL.md")); let expected_path_str = expected_path.to_string_lossy().replace('\\', "/"); - let usage_rules = "- Discovery: Available skills are listed in project docs and may also appear in a runtime \"## Skills\" section (name + description + file path). These are the sources of truth; skill bodies live on disk at the listed paths.\n- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback.\n- How to use a skill (progressive disclosure):\n 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow.\n 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything.\n 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.\n 4) If `assets/` or templates exist, reuse them instead of recreating from scratch.\n- Description as trigger: The YAML `description` in `SKILL.md` is the primary trigger signal; rely on it to decide applicability. If unsure, ask a brief clarification before proceeding.\n- Coordination and sequencing:\n - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.\n - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.\n- Context hygiene:\n - Keep context small: summarize long sections instead of pasting them; only load extra files when needed.\n - Avoid deeply nested references; prefer one-hop files explicitly linked from `SKILL.md`.\n - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice.\n- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."; + let usage_rules = "- Discovery: The list above is the skills available in this session (name + description + file path). Skill bodies live on disk at the listed paths.\n- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description shown above, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback.\n- How to use a skill (progressive disclosure):\n 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow.\n 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything.\n 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.\n 4) If `assets/` or templates exist, reuse them instead of recreating from scratch.\n- Coordination and sequencing:\n - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.\n - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.\n- Context hygiene:\n - Keep context small: summarize long sections instead of pasting them; only load extra files when needed.\n - Avoid deep reference-chasing: prefer opening only files directly linked from `SKILL.md` unless you're blocked.\n - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice.\n- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."; let expected = format!( - "## Skills\nThese skills are discovered at startup from multiple local sources. Each entry includes a name, description, and file path so you can open the source for full instructions.\n- linting: run clippy (file: {expected_path_str})\n{usage_rules}" + "## Skills\nA skill is a set of local instructions to follow that is stored in a `SKILL.md` file. Below is the list of skills that can be used. Each entry includes a name, description, and file path so you can open the source for full instructions when using a specific skill.\n### Available skills\n- linting: run clippy (file: {expected_path_str})\n### How to use skills\n{usage_rules}" ); assert_eq!(res, expected); } diff --git a/codex-rs/core/src/rollout/error.rs b/codex-rs/core/src/rollout/error.rs index e924dd2d28b..ee48bb20295 100644 --- a/codex-rs/core/src/rollout/error.rs +++ b/codex-rs/core/src/rollout/error.rs @@ -33,7 +33,7 @@ fn map_rollout_io_error(io_err: &std::io::Error, codex_home: &Path) -> Option format!( - "Session data under {} looks corrupt or unreadable. Clearing the sessions directory may help (this will remove saved conversations).", + "Session data under {} looks corrupt or unreadable. Clearing the sessions directory may help (this will remove saved threads).", sessions_dir.display() ), ErrorKind::IsADirectory | ErrorKind::NotADirectory => format!( diff --git a/codex-rs/core/src/rollout/list.rs b/codex-rs/core/src/rollout/list.rs index e2ef0e883c6..487304ddc80 100644 --- a/codex-rs/core/src/rollout/list.rs +++ b/codex-rs/core/src/rollout/list.rs @@ -20,11 +20,11 @@ use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; use codex_protocol::protocol::SessionSource; -/// Returned page of conversation summaries. +/// Returned page of thread (thread) summaries. #[derive(Debug, Default, PartialEq)] -pub struct ConversationsPage { - /// Conversation summaries ordered newest first. - pub items: Vec, +pub struct ThreadsPage { + /// Thread summaries ordered newest first. + pub items: Vec, /// Opaque pagination token to resume after the last item, or `None` if end. pub next_cursor: Option, /// Total number of files touched while scanning this request. @@ -33,9 +33,9 @@ pub struct ConversationsPage { pub reached_scan_cap: bool, } -/// Summary information for a conversation rollout file. +/// Summary information for a thread rollout file. #[derive(Debug, PartialEq)] -pub struct ConversationItem { +pub struct ThreadItem { /// Absolute path to the rollout file. pub path: PathBuf, /// First up to `HEAD_RECORD_LIMIT` JSONL records parsed as JSON (includes meta line). @@ -46,6 +46,13 @@ pub struct ConversationItem { pub updated_at: Option, } +#[allow(dead_code)] +#[deprecated(note = "use ThreadItem")] +pub type ConversationItem = ThreadItem; +#[allow(dead_code)] +#[deprecated(note = "use ThreadsPage")] +pub type ConversationsPage = ThreadsPage; + #[derive(Default)] struct HeadTailSummary { head: Vec, @@ -99,22 +106,22 @@ impl<'de> serde::Deserialize<'de> for Cursor { } } -/// Retrieve recorded conversation file paths with token pagination. The returned `next_cursor` +/// Retrieve recorded thread file paths with token pagination. The returned `next_cursor` /// can be supplied on the next call to resume after the last returned item, resilient to /// concurrent new sessions being appended. Ordering is stable by timestamp desc, then UUID desc. -pub(crate) async fn get_conversations( +pub(crate) async fn get_threads( codex_home: &Path, page_size: usize, cursor: Option<&Cursor>, allowed_sources: &[SessionSource], model_providers: Option<&[String]>, default_provider: &str, -) -> io::Result { +) -> io::Result { let mut root = codex_home.to_path_buf(); root.push(SESSIONS_SUBDIR); if !root.exists() { - return Ok(ConversationsPage { + return Ok(ThreadsPage { items: Vec::new(), next_cursor: None, num_scanned_files: 0, @@ -138,7 +145,7 @@ pub(crate) async fn get_conversations( Ok(result) } -/// Load conversation file paths from disk using directory traversal. +/// Load thread file paths from disk using directory traversal. /// /// Directory layout: `~/.codex/sessions/YYYY/MM/DD/rollout-YYYY-MM-DDThh-mm-ss-.jsonl` /// Returned newest (latest) first. @@ -148,8 +155,8 @@ async fn traverse_directories_for_paths( anchor: Option, allowed_sources: &[SessionSource], provider_matcher: Option<&ProviderMatcher<'_>>, -) -> io::Result { - let mut items: Vec = Vec::with_capacity(page_size); +) -> io::Result { + let mut items: Vec = Vec::with_capacity(page_size); let mut scanned_files = 0usize; let mut anchor_passed = anchor.is_none(); let (anchor_ts, anchor_id) = match anchor { @@ -232,7 +239,7 @@ async fn traverse_directories_for_paths( .unwrap_or(None) .or_else(|| created_at.clone()); } - items.push(ConversationItem { + items.push(ThreadItem { path, head, created_at, @@ -254,7 +261,7 @@ async fn traverse_directories_for_paths( } else { None }; - Ok(ConversationsPage { + Ok(ThreadsPage { items, next_cursor: next, num_scanned_files: scanned_files, @@ -279,7 +286,7 @@ pub fn parse_cursor(token: &str) -> Option { Some(Cursor::new(ts, uuid)) } -fn build_next_cursor(items: &[ConversationItem]) -> Option { +fn build_next_cursor(items: &[ThreadItem]) -> Option { let last = items.last()?; let file_name = last.path.file_name()?.to_string_lossy(); let (ts, id) = parse_timestamp_uuid_from_filename(&file_name)?; @@ -455,10 +462,10 @@ async fn file_modified_rfc3339(path: &Path) -> io::Result> { Ok(dt.format(&Rfc3339).ok()) } -/// Locate a recorded conversation rollout file by its UUID string using the existing +/// Locate a recorded thread rollout file by its UUID string using the existing /// paginated listing implementation. Returns `Ok(Some(path))` if found, `Ok(None)` if not present /// or the id is invalid. -pub async fn find_conversation_path_by_id_str( +pub async fn find_thread_path_by_id_str( codex_home: &Path, id_str: &str, ) -> io::Result> { diff --git a/codex-rs/core/src/rollout/mod.rs b/codex-rs/core/src/rollout/mod.rs index 540d204be3e..5b65bada7c4 100644 --- a/codex-rs/core/src/rollout/mod.rs +++ b/codex-rs/core/src/rollout/mod.rs @@ -11,10 +11,13 @@ pub(crate) mod error; pub mod list; pub(crate) mod policy; pub mod recorder; +pub(crate) mod truncation; pub use codex_protocol::protocol::SessionMeta; pub(crate) use error::map_session_init_error; -pub use list::find_conversation_path_by_id_str; +pub use list::find_thread_path_by_id_str; +#[deprecated(note = "use find_thread_path_by_id_str")] +pub use list::find_thread_path_by_id_str as find_conversation_path_by_id_str; pub use recorder::RolloutRecorder; pub use recorder::RolloutRecorderParams; diff --git a/codex-rs/core/src/rollout/policy.rs b/codex-rs/core/src/rollout/policy.rs index 07c8af1144b..6c02ad09425 100644 --- a/codex-rs/core/src/rollout/policy.rs +++ b/codex-rs/core/src/rollout/policy.rs @@ -45,6 +45,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool { | EventMsg::ContextCompacted(_) | EventMsg::EnteredReviewMode(_) | EventMsg::ExitedReviewMode(_) + | EventMsg::ThreadRolledBack(_) | EventMsg::UndoCompleted(_) | EventMsg::TurnAborted(_) => true, EventMsg::Error(_) diff --git a/codex-rs/core/src/rollout/recorder.rs b/codex-rs/core/src/rollout/recorder.rs index a39f85c823d..a61f40d05e7 100644 --- a/codex-rs/core/src/rollout/recorder.rs +++ b/codex-rs/core/src/rollout/recorder.rs @@ -6,7 +6,7 @@ use std::io::Error as IoError; use std::path::Path; use std::path::PathBuf; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use serde_json::Value; use time::OffsetDateTime; use time::format_description::FormatItem; @@ -19,9 +19,9 @@ use tracing::info; use tracing::warn; use super::SESSIONS_SUBDIR; -use super::list::ConversationsPage; use super::list::Cursor; -use super::list::get_conversations; +use super::list::ThreadsPage; +use super::list::get_threads; use super::policy::is_persisted_response_item; use crate::config::Config; use crate::default_client::originator; @@ -52,7 +52,7 @@ pub struct RolloutRecorder { #[derive(Clone)] pub enum RolloutRecorderParams { Create { - conversation_id: ConversationId, + conversation_id: ThreadId, instructions: Option, source: SessionSource, }, @@ -67,6 +67,11 @@ enum RolloutCmd { Flush { ack: oneshot::Sender<()>, }, + /// Rewrite the first SessionMeta line in the rollout file to include a name. + SetSessionName { + name: String, + ack: oneshot::Sender>, + }, Shutdown { ack: oneshot::Sender<()>, }, @@ -74,7 +79,7 @@ enum RolloutCmd { impl RolloutRecorderParams { pub fn new( - conversation_id: ConversationId, + conversation_id: ThreadId, instructions: Option, source: SessionSource, ) -> Self { @@ -91,16 +96,16 @@ impl RolloutRecorderParams { } impl RolloutRecorder { - /// List conversations (rollout files) under the provided Codex home directory. - pub async fn list_conversations( + /// List threads (rollout files) under the provided Codex home directory. + pub async fn list_threads( codex_home: &Path, page_size: usize, cursor: Option<&Cursor>, allowed_sources: &[SessionSource], model_providers: Option<&[String]>, default_provider: &str, - ) -> std::io::Result { - get_conversations( + ) -> std::io::Result { + get_threads( codex_home, page_size, cursor, @@ -143,6 +148,7 @@ impl RolloutRecorder { id: session_id, timestamp, cwd: config.cwd.clone(), + name: None, originator: originator().value.clone(), cli_version: env!("CARGO_PKG_VERSION").to_string(), instructions, @@ -172,7 +178,7 @@ impl RolloutRecorder { // Spawn a Tokio task that owns the file handle and performs async // writes. Using `tokio::fs::File` keeps everything on the async I/O // driver instead of blocking the runtime. - tokio::task::spawn(rollout_writer(file, rx, meta, cwd)); + tokio::task::spawn(rollout_writer(file, rx, meta, cwd, rollout_path.clone())); Ok(Self { tx, rollout_path }) } @@ -207,6 +213,16 @@ impl RolloutRecorder { .map_err(|e| IoError::other(format!("failed waiting for rollout flush: {e}"))) } + pub async fn set_session_name(&self, name: String) -> std::io::Result<()> { + let (tx, rx) = oneshot::channel(); + self.tx + .send(RolloutCmd::SetSessionName { name, ack: tx }) + .await + .map_err(|e| IoError::other(format!("failed to queue session name update: {e}")))?; + rx.await + .map_err(|e| IoError::other(format!("failed waiting for session name update: {e}")))? + } + pub async fn get_rollout_history(path: &Path) -> std::io::Result { info!("Resuming rollout from {path:?}"); let text = tokio::fs::read_to_string(path).await?; @@ -215,7 +231,7 @@ impl RolloutRecorder { } let mut items: Vec = Vec::new(); - let mut conversation_id: Option = None; + let mut thread_id: Option = None; for line in text.lines() { if line.trim().is_empty() { continue; @@ -233,9 +249,9 @@ impl RolloutRecorder { Ok(rollout_line) => match rollout_line.item { RolloutItem::SessionMeta(session_meta_line) => { // Use the FIRST SessionMeta encountered in the file as the canonical - // conversation id and main session information. Keep all items intact. - if conversation_id.is_none() { - conversation_id = Some(session_meta_line.meta.id); + // thread id and main session information. Keep all items intact. + if thread_id.is_none() { + thread_id = Some(session_meta_line.meta.id); } items.push(RolloutItem::SessionMeta(session_meta_line)); } @@ -259,12 +275,12 @@ impl RolloutRecorder { } info!( - "Resumed rollout with {} items, conversation ID: {:?}", + "Resumed rollout with {} items, thread ID: {:?}", items.len(), - conversation_id + thread_id ); - let conversation_id = conversation_id - .ok_or_else(|| IoError::other("failed to parse conversation ID from rollout file"))?; + let conversation_id = thread_id + .ok_or_else(|| IoError::other("failed to parse thread ID from rollout file"))?; if items.is_empty() { return Ok(InitialHistory::New); @@ -302,16 +318,13 @@ struct LogFileInfo { path: PathBuf, /// Session ID (also embedded in filename). - conversation_id: ConversationId, + conversation_id: ThreadId, /// Timestamp for the start of the session. timestamp: OffsetDateTime, } -fn create_log_file( - config: &Config, - conversation_id: ConversationId, -) -> std::io::Result { +fn create_log_file(config: &Config, conversation_id: ThreadId) -> std::io::Result { // Resolve ~/.codex/sessions/YYYY/MM/DD and create it if missing. let timestamp = OffsetDateTime::now_local() .map_err(|e| IoError::other(format!("failed to get local time: {e}")))?; @@ -351,6 +364,7 @@ async fn rollout_writer( mut rx: mpsc::Receiver, mut meta: Option, cwd: std::path::PathBuf, + rollout_path: PathBuf, ) -> std::io::Result<()> { let mut writer = JsonlWriter { file }; @@ -386,6 +400,10 @@ async fn rollout_writer( } let _ = ack.send(()); } + RolloutCmd::SetSessionName { name, ack } => { + let result = rewrite_session_name(&mut writer, &rollout_path, &name).await; + let _ = ack.send(result); + } RolloutCmd::Shutdown { ack } => { let _ = ack.send(()); } @@ -395,6 +413,118 @@ async fn rollout_writer( Ok(()) } +async fn rewrite_session_name( + writer: &mut JsonlWriter, + rollout_path: &Path, + name: &str, +) -> std::io::Result<()> { + // Flush and close the writer's file handle before swapping the on-disk file, + // otherwise subsequent appends would keep writing to the old inode/handle. + writer.file.flush().await?; + + // Compute the rewritten contents first so any read/parse/legacy-format errors + // don't disturb the active writer handle. + let rewritten_contents = rewrite_first_session_meta_line_name(rollout_path, name).await?; + + // Close the active handle using a portable placeholder. + let placeholder = tokio::fs::File::from_std(tempfile::tempfile()?); + let old_file = std::mem::replace(&mut writer.file, placeholder); + drop(old_file); + + if let Err(e) = replace_rollout_file(rollout_path, rewritten_contents).await { + // Best-effort: ensure the writer keeps pointing at the rollout file, not the placeholder. + let reopened = tokio::fs::OpenOptions::new() + .append(true) + .create(true) + .open(rollout_path) + .await; + if let Ok(reopened) = reopened { + let placeholder = std::mem::replace(&mut writer.file, reopened); + drop(placeholder); + } + return Err(e); + } + + // Re-open the rollout for appends and drop the placeholder handle. + let reopened = tokio::fs::OpenOptions::new() + .append(true) + .open(rollout_path) + .await?; + let placeholder = std::mem::replace(&mut writer.file, reopened); + drop(placeholder); + + Ok(()) +} + +async fn rewrite_first_session_meta_line_name( + rollout_path: &Path, + name: &str, +) -> std::io::Result { + let text = tokio::fs::read_to_string(rollout_path).await?; + let mut rewritten = false; + + // Rewrite the first non-empty line only. Since 43809a454 ("Introduce rollout items", + // 2025-09-09), rollouts we write always start with a RolloutLine wrapping + // RolloutItem::SessionMeta(_). + let mut out = String::with_capacity(text.len() + 32); + for line in text.lines() { + if !rewritten && !line.trim().is_empty() { + out.push_str(&rewrite_session_meta_line_name(line, name)?); + rewritten = true; + } else { + out.push_str(line); + } + out.push('\n'); + } + + if !rewritten { + return Err(IoError::other( + "failed to set session name: rollout has no SessionMeta line", + )); + } + + Ok(out) +} + +fn rewrite_session_meta_line_name(line: &str, name: &str) -> std::io::Result { + let mut rollout_line = serde_json::from_str::(line).map_err(IoError::other)?; + let RolloutItem::SessionMeta(meta_line) = &mut rollout_line.item else { + return Err(IoError::other( + "failed to set session name: rollout has no SessionMeta line", + )); + }; + + meta_line.meta.name = Some(name.to_string()); + serde_json::to_string(&rollout_line).map_err(IoError::other) +} + +async fn replace_rollout_file(path: &Path, contents: String) -> std::io::Result<()> { + let Some(dir) = path.parent() else { + return Err(IoError::other("rollout path has no parent directory")); + }; + + let mut tmp = tempfile::NamedTempFile::new_in(dir)?; + use std::io::Write as _; + tmp.write_all(contents.as_bytes())?; + tmp.flush()?; + + let (_file, tmp_path) = tmp.keep()?; + drop(_file); + + #[cfg(windows)] + { + let _ = std::fs::remove_file(path); + std::fs::rename(&tmp_path, path)?; + } + + #[cfg(not(windows))] + { + std::fs::rename(&tmp_path, path)?; + } + + Ok(()) +} + struct JsonlWriter { file: tokio::fs::File, } @@ -422,3 +552,64 @@ impl JsonlWriter { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use codex_protocol::ThreadId; + use pretty_assertions::assert_eq; + use tokio::io::AsyncWriteExt; + + #[tokio::test] + async fn set_session_name_rewrites_first_session_meta_line() -> std::io::Result<()> { + let config = crate::config::test_config(); + + let conversation_id = ThreadId::new(); + let recorder = RolloutRecorder::new( + &config, + RolloutRecorderParams::new(conversation_id, None, SessionSource::Cli), + ) + .await?; + + recorder + .set_session_name("My Session Name".to_string()) + .await?; + + let text = tokio::fs::read_to_string(&recorder.rollout_path).await?; + let first_line = text.lines().find(|l| !l.trim().is_empty()).unwrap_or(""); + let rollout_line: RolloutLine = serde_json::from_str(first_line)?; + let RolloutItem::SessionMeta(meta_line) = rollout_line.item else { + panic!("expected SessionMeta as first rollout line"); + }; + assert_eq!(meta_line.meta.name.as_deref(), Some("My Session Name")); + Ok(()) + } + + #[tokio::test] + async fn set_session_name_failure_does_not_redirect_future_writes() -> std::io::Result<()> { + let dir = tempfile::tempdir()?; + let rollout_path = dir.path().join("rollout.jsonl"); + + // Invalid JSON as the first non-empty line triggers a parse error in the rewrite step. + tokio::fs::write(&rollout_path, "{\n").await?; + + let file = tokio::fs::OpenOptions::new() + .append(true) + .open(&rollout_path) + .await?; + let mut writer = JsonlWriter { file }; + + assert!( + rewrite_session_name(&mut writer, &rollout_path, "name") + .await + .is_err() + ); + + writer.file.write_all(b"AFTER\n").await?; + writer.file.flush().await?; + + let text = tokio::fs::read_to_string(&rollout_path).await?; + assert!(text.trim_end().ends_with("AFTER")); + Ok(()) + } +} diff --git a/codex-rs/core/src/rollout/tests.rs b/codex-rs/core/src/rollout/tests.rs index 1df3659ba0f..51f842eecef 100644 --- a/codex-rs/core/src/rollout/tests.rs +++ b/codex-rs/core/src/rollout/tests.rs @@ -13,12 +13,12 @@ use time::macros::format_description; use uuid::Uuid; use crate::rollout::INTERACTIVE_SESSION_SOURCES; -use crate::rollout::list::ConversationItem; -use crate::rollout::list::ConversationsPage; use crate::rollout::list::Cursor; -use crate::rollout::list::get_conversations; +use crate::rollout::list::ThreadItem; +use crate::rollout::list::ThreadsPage; +use crate::rollout::list::get_threads; use anyhow::Result; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::EventMsg; @@ -162,7 +162,7 @@ async fn test_list_conversations_latest_first() { .unwrap(); let provider_filter = provider_vec(&[TEST_PROVIDER]); - let page = get_conversations( + let page = get_threads( home, 10, None, @@ -227,21 +227,21 @@ async fn test_list_conversations_latest_first() { let updated_times: Vec> = page.items.iter().map(|i| i.updated_at.clone()).collect(); - let expected = ConversationsPage { + let expected = ThreadsPage { items: vec![ - ConversationItem { + ThreadItem { path: p1, head: head_3, created_at: Some("2025-01-03T12-00-00".into()), updated_at: updated_times.first().cloned().flatten(), }, - ConversationItem { + ThreadItem { path: p2, head: head_2, created_at: Some("2025-01-02T12-00-00".into()), updated_at: updated_times.get(1).cloned().flatten(), }, - ConversationItem { + ThreadItem { path: p3, head: head_1, created_at: Some("2025-01-01T12-00-00".into()), @@ -311,7 +311,7 @@ async fn test_pagination_cursor() { .unwrap(); let provider_filter = provider_vec(&[TEST_PROVIDER]); - let page1 = get_conversations( + let page1 = get_threads( home, 2, None, @@ -357,15 +357,15 @@ async fn test_pagination_cursor() { page1.items.iter().map(|i| i.updated_at.clone()).collect(); let expected_cursor1: Cursor = serde_json::from_str(&format!("\"2025-03-04T09-00-00|{u4}\"")).unwrap(); - let expected_page1 = ConversationsPage { + let expected_page1 = ThreadsPage { items: vec![ - ConversationItem { + ThreadItem { path: p5, head: head_5, created_at: Some("2025-03-05T09-00-00".into()), updated_at: updated_page1.first().cloned().flatten(), }, - ConversationItem { + ThreadItem { path: p4, head: head_4, created_at: Some("2025-03-04T09-00-00".into()), @@ -378,7 +378,7 @@ async fn test_pagination_cursor() { }; assert_eq!(page1, expected_page1); - let page2 = get_conversations( + let page2 = get_threads( home, 2, page1.next_cursor.as_ref(), @@ -424,15 +424,15 @@ async fn test_pagination_cursor() { page2.items.iter().map(|i| i.updated_at.clone()).collect(); let expected_cursor2: Cursor = serde_json::from_str(&format!("\"2025-03-02T09-00-00|{u2}\"")).unwrap(); - let expected_page2 = ConversationsPage { + let expected_page2 = ThreadsPage { items: vec![ - ConversationItem { + ThreadItem { path: p3, head: head_3, created_at: Some("2025-03-03T09-00-00".into()), updated_at: updated_page2.first().cloned().flatten(), }, - ConversationItem { + ThreadItem { path: p2, head: head_2, created_at: Some("2025-03-02T09-00-00".into()), @@ -445,7 +445,7 @@ async fn test_pagination_cursor() { }; assert_eq!(page2, expected_page2); - let page3 = get_conversations( + let page3 = get_threads( home, 2, page2.next_cursor.as_ref(), @@ -473,8 +473,8 @@ async fn test_pagination_cursor() { })]; let updated_page3: Vec> = page3.items.iter().map(|i| i.updated_at.clone()).collect(); - let expected_page3 = ConversationsPage { - items: vec![ConversationItem { + let expected_page3 = ThreadsPage { + items: vec![ThreadItem { path: p1, head: head_1, created_at: Some("2025-03-01T09-00-00".into()), @@ -488,7 +488,7 @@ async fn test_pagination_cursor() { } #[tokio::test] -async fn test_get_conversation_contents() { +async fn test_get_thread_contents() { let temp = TempDir::new().unwrap(); let home = temp.path(); @@ -497,7 +497,7 @@ async fn test_get_conversation_contents() { write_session_file(home, ts, uuid, 2, Some(SessionSource::VSCode)).unwrap(); let provider_filter = provider_vec(&[TEST_PROVIDER]); - let page = get_conversations( + let page = get_threads( home, 1, None, @@ -528,8 +528,8 @@ async fn test_get_conversation_contents() { "source": "vscode", "model_provider": "test-provider", })]; - let expected_page = ConversationsPage { - items: vec![ConversationItem { + let expected_page = ThreadsPage { + items: vec![ThreadItem { path: expected_path, head: expected_head, created_at: Some(ts.into()), @@ -579,7 +579,7 @@ async fn test_updated_at_uses_file_mtime() -> Result<()> { let file_path = day_dir.join(format!("rollout-{ts}-{uuid}.jsonl")); let mut file = File::create(&file_path)?; - let conversation_id = ConversationId::from_string(&uuid.to_string())?; + let conversation_id = ThreadId::from_string(&uuid.to_string())?; let meta_line = RolloutLine { timestamp: ts.to_string(), item: RolloutItem::SessionMeta(SessionMetaLine { @@ -588,6 +588,7 @@ async fn test_updated_at_uses_file_mtime() -> Result<()> { timestamp: ts.to_string(), instructions: None, cwd: ".".into(), + name: None, originator: "test_originator".into(), cli_version: "test_version".into(), source: SessionSource::VSCode, @@ -624,7 +625,7 @@ async fn test_updated_at_uses_file_mtime() -> Result<()> { drop(file); let provider_filter = provider_vec(&[TEST_PROVIDER]); - let page = get_conversations( + let page = get_threads( home, 1, None, @@ -663,7 +664,7 @@ async fn test_stable_ordering_same_second_pagination() { write_session_file(home, ts, u3, 0, Some(SessionSource::VSCode)).unwrap(); let provider_filter = provider_vec(&[TEST_PROVIDER]); - let page1 = get_conversations( + let page1 = get_threads( home, 2, None, @@ -701,15 +702,15 @@ async fn test_stable_ordering_same_second_pagination() { let updated_page1: Vec> = page1.items.iter().map(|i| i.updated_at.clone()).collect(); let expected_cursor1: Cursor = serde_json::from_str(&format!("\"{ts}|{u2}\"")).unwrap(); - let expected_page1 = ConversationsPage { + let expected_page1 = ThreadsPage { items: vec![ - ConversationItem { + ThreadItem { path: p3, head: head(u3), created_at: Some(ts.to_string()), updated_at: updated_page1.first().cloned().flatten(), }, - ConversationItem { + ThreadItem { path: p2, head: head(u2), created_at: Some(ts.to_string()), @@ -722,7 +723,7 @@ async fn test_stable_ordering_same_second_pagination() { }; assert_eq!(page1, expected_page1); - let page2 = get_conversations( + let page2 = get_threads( home, 2, page1.next_cursor.as_ref(), @@ -740,8 +741,8 @@ async fn test_stable_ordering_same_second_pagination() { .join(format!("rollout-2025-07-01T00-00-00-{u1}.jsonl")); let updated_page2: Vec> = page2.items.iter().map(|i| i.updated_at.clone()).collect(); - let expected_page2 = ConversationsPage { - items: vec![ConversationItem { + let expected_page2 = ThreadsPage { + items: vec![ThreadItem { path: p1, head: head(u1), created_at: Some(ts.to_string()), @@ -780,7 +781,7 @@ async fn test_source_filter_excludes_non_matching_sessions() { .unwrap(); let provider_filter = provider_vec(&[TEST_PROVIDER]); - let interactive_only = get_conversations( + let interactive_only = get_threads( home, 10, None, @@ -801,7 +802,7 @@ async fn test_source_filter_excludes_non_matching_sessions() { path.ends_with("rollout-2025-08-02T10-00-00-00000000-0000-0000-0000-00000000002a.jsonl") })); - let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER, None, TEST_PROVIDER) + let all_sessions = get_threads(home, 10, None, NO_SOURCE_FILTER, None, TEST_PROVIDER) .await .unwrap(); let all_paths: Vec<_> = all_sessions @@ -855,7 +856,7 @@ async fn test_model_provider_filter_selects_only_matching_sessions() -> Result<( let openai_id_str = openai_id.to_string(); let none_id_str = none_id.to_string(); let openai_filter = provider_vec(&["openai"]); - let openai_sessions = get_conversations( + let openai_sessions = get_threads( home, 10, None, @@ -880,7 +881,7 @@ async fn test_model_provider_filter_selects_only_matching_sessions() -> Result<( assert!(openai_ids.contains(&none_id_str)); let beta_filter = provider_vec(&["beta"]); - let beta_sessions = get_conversations( + let beta_sessions = get_threads( home, 10, None, @@ -900,7 +901,7 @@ async fn test_model_provider_filter_selects_only_matching_sessions() -> Result<( assert_eq!(beta_head, Some(beta_id_str.as_str())); let unknown_filter = provider_vec(&["unknown"]); - let unknown_sessions = get_conversations( + let unknown_sessions = get_threads( home, 10, None, @@ -911,7 +912,7 @@ async fn test_model_provider_filter_selects_only_matching_sessions() -> Result<( .await?; assert!(unknown_sessions.items.is_empty()); - let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER, None, "openai").await?; + let all_sessions = get_threads(home, 10, None, NO_SOURCE_FILTER, None, "openai").await?; assert_eq!(all_sessions.items.len(), 3); Ok(()) diff --git a/codex-rs/core/src/rollout/truncation.rs b/codex-rs/core/src/rollout/truncation.rs new file mode 100644 index 00000000000..b8127f0345b --- /dev/null +++ b/codex-rs/core/src/rollout/truncation.rs @@ -0,0 +1,195 @@ +//! Helpers for truncating rollouts based on "user turn" boundaries. +//! +//! In core, "user turns" are detected by scanning `ResponseItem::Message` items and +//! interpreting them via `event_mapping::parse_turn_item(...)`. + +use crate::event_mapping; +use codex_protocol::items::TurnItem; +use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::RolloutItem; + +/// Return the indices of user message boundaries in a rollout. +/// +/// A user message boundary is a `RolloutItem::ResponseItem(ResponseItem::Message { .. })` +/// whose parsed turn item is `TurnItem::UserMessage`. +/// +/// Rollouts can contain `ThreadRolledBack` markers. Those markers indicate that the +/// last N user turns were removed from the effective thread history; we apply them here so +/// indexing uses the post-rollback history rather than the raw stream. +pub(crate) fn user_message_positions_in_rollout(items: &[RolloutItem]) -> Vec { + let mut user_positions = Vec::new(); + for (idx, item) in items.iter().enumerate() { + match item { + RolloutItem::ResponseItem(item @ ResponseItem::Message { .. }) + if matches!( + event_mapping::parse_turn_item(item), + Some(TurnItem::UserMessage(_)) + ) => + { + user_positions.push(idx); + } + RolloutItem::EventMsg(EventMsg::ThreadRolledBack(rollback)) => { + let num_turns = usize::try_from(rollback.num_turns).unwrap_or(usize::MAX); + let new_len = user_positions.len().saturating_sub(num_turns); + user_positions.truncate(new_len); + } + _ => {} + } + } + user_positions +} + +/// Return a prefix of `items` obtained by cutting strictly before the nth user message. +/// +/// The boundary index is 0-based from the start of `items` (so `n_from_start = 0` returns +/// a prefix that excludes the first user message and everything after it). +/// +/// If fewer than or equal to `n_from_start` user messages exist, this returns an empty +/// vector (out of range). +pub(crate) fn truncate_rollout_before_nth_user_message_from_start( + items: &[RolloutItem], + n_from_start: usize, +) -> Vec { + let user_positions = user_message_positions_in_rollout(items); + + // If fewer than or equal to n user messages exist, treat as empty (out of range). + if user_positions.len() <= n_from_start { + return Vec::new(); + } + + // Cut strictly before the nth user message (do not keep the nth itself). + let cut_idx = user_positions[n_from_start]; + items[..cut_idx].to_vec() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::codex::make_session_and_context; + use assert_matches::assert_matches; + use codex_protocol::models::ContentItem; + use codex_protocol::models::ReasoningItemReasoningSummary; + use codex_protocol::protocol::ThreadRolledBackEvent; + use pretty_assertions::assert_eq; + + fn user_msg(text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::OutputText { + text: text.to_string(), + }], + } + } + + fn assistant_msg(text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![ContentItem::OutputText { + text: text.to_string(), + }], + } + } + + #[test] + fn truncates_rollout_from_start_before_nth_user_only() { + let items = [ + user_msg("u1"), + assistant_msg("a1"), + assistant_msg("a2"), + user_msg("u2"), + assistant_msg("a3"), + ResponseItem::Reasoning { + id: "r1".to_string(), + summary: vec![ReasoningItemReasoningSummary::SummaryText { + text: "s".to_string(), + }], + content: None, + encrypted_content: None, + }, + ResponseItem::FunctionCall { + id: None, + name: "tool".to_string(), + arguments: "{}".to_string(), + call_id: "c1".to_string(), + }, + assistant_msg("a4"), + ]; + + let rollout: Vec = items + .iter() + .cloned() + .map(RolloutItem::ResponseItem) + .collect(); + + let truncated = truncate_rollout_before_nth_user_message_from_start(&rollout, 1); + let expected = vec![ + RolloutItem::ResponseItem(items[0].clone()), + RolloutItem::ResponseItem(items[1].clone()), + RolloutItem::ResponseItem(items[2].clone()), + ]; + assert_eq!( + serde_json::to_value(&truncated).unwrap(), + serde_json::to_value(&expected).unwrap() + ); + + let truncated2 = truncate_rollout_before_nth_user_message_from_start(&rollout, 2); + assert_matches!(truncated2.as_slice(), []); + } + + #[test] + fn truncates_rollout_from_start_applies_thread_rollback_markers() { + let rollout_items = vec![ + RolloutItem::ResponseItem(user_msg("u1")), + RolloutItem::ResponseItem(assistant_msg("a1")), + RolloutItem::ResponseItem(user_msg("u2")), + RolloutItem::ResponseItem(assistant_msg("a2")), + RolloutItem::EventMsg(EventMsg::ThreadRolledBack(ThreadRolledBackEvent { + num_turns: 1, + })), + RolloutItem::ResponseItem(user_msg("u3")), + RolloutItem::ResponseItem(assistant_msg("a3")), + RolloutItem::ResponseItem(user_msg("u4")), + RolloutItem::ResponseItem(assistant_msg("a4")), + ]; + + // Effective user history after applying rollback(1) is: u1, u3, u4. + // So n_from_start=2 should cut before u4 (not u3). + let truncated = truncate_rollout_before_nth_user_message_from_start(&rollout_items, 2); + let expected = rollout_items[..7].to_vec(); + assert_eq!( + serde_json::to_value(&truncated).unwrap(), + serde_json::to_value(&expected).unwrap() + ); + } + + #[tokio::test] + async fn ignores_session_prefix_messages_when_truncating_rollout_from_start() { + let (session, turn_context) = make_session_and_context().await; + let mut items = session.build_initial_context(&turn_context); + items.push(user_msg("feature request")); + items.push(assistant_msg("ack")); + items.push(user_msg("second question")); + items.push(assistant_msg("answer")); + + let rollout_items: Vec = items + .iter() + .cloned() + .map(RolloutItem::ResponseItem) + .collect(); + + let truncated = truncate_rollout_before_nth_user_message_from_start(&rollout_items, 1); + let expected: Vec = vec![ + RolloutItem::ResponseItem(items[0].clone()), + RolloutItem::ResponseItem(items[1].clone()), + RolloutItem::ResponseItem(items[2].clone()), + ]; + + assert_eq!( + serde_json::to_value(&truncated).unwrap(), + serde_json::to_value(&expected).unwrap() + ); + } +} diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs index c3930b4f428..601a5a8b81e 100644 --- a/codex-rs/core/src/safety.rs +++ b/codex-rs/core/src/safety.rs @@ -277,7 +277,7 @@ mod tests { }; assert_eq!( - assess_patch_safety(&add_inside, AskForApproval::OnRequest, &policy, &cwd,), + assess_patch_safety(&add_inside, AskForApproval::OnRequest, &policy, &cwd), SafetyCheck::AutoApprove { sandbox_type: SandboxType::None, user_explicitly_approved: false, diff --git a/codex-rs/core/src/skills/assets/samples/skill-creator/SKILL.md b/codex-rs/core/src/skills/assets/samples/skill-creator/SKILL.md index 7b44b52b22d..60251f16a66 100644 --- a/codex-rs/core/src/skills/assets/samples/skill-creator/SKILL.md +++ b/codex-rs/core/src/skills/assets/samples/skill-creator/SKILL.md @@ -328,6 +328,8 @@ Write the YAML frontmatter with `name` and `description`: - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to Codex. - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when Codex needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks" +Ensure the frontmatter is valid YAML. Keep `name` and `description` as single-line scalars. If either could be interpreted as YAML syntax, wrap it in quotes. + Do not include any other fields in YAML frontmatter. ##### Body diff --git a/codex-rs/core/src/skills/loader.rs b/codex-rs/core/src/skills/loader.rs index bce13fbb057..a7461dcf97f 100644 --- a/codex-rs/core/src/skills/loader.rs +++ b/codex-rs/core/src/skills/loader.rs @@ -1,11 +1,12 @@ use crate::config::Config; -use crate::git_info::resolve_root_git_project_for_trust; +use crate::config_loader::ConfigLayerStack; use crate::skills::model::SkillError; use crate::skills::model::SkillLoadOutcome; use crate::skills::model::SkillMetadata; use crate::skills::system::system_cache_root_dir; +use codex_app_server_protocol::ConfigLayerSource; use codex_protocol::protocol::SkillScope; -use dunce::canonicalize as normalize_path; +use dunce::canonicalize as canonicalize_path; use serde::Deserialize; use std::collections::HashSet; use std::collections::VecDeque; @@ -32,11 +33,12 @@ struct SkillFrontmatterMetadata { const SKILLS_FILENAME: &str = "SKILL.md"; const SKILLS_DIR_NAME: &str = "skills"; -const REPO_ROOT_CONFIG_DIR_NAME: &str = ".codex"; -const ADMIN_SKILLS_ROOT: &str = "/etc/codex/skills"; const MAX_NAME_LEN: usize = 64; const MAX_DESCRIPTION_LEN: usize = 1024; const MAX_SHORT_DESCRIPTION_LEN: usize = MAX_DESCRIPTION_LEN; +// Traversal depth from the skills root. +const MAX_SCAN_DEPTH: usize = 6; +const MAX_SKILLS_DIRS_PER_ROOT: usize = 2000; #[derive(Debug)] enum SkillParseError { @@ -88,90 +90,85 @@ where .skills .retain(|skill| seen.insert(skill.name.clone())); - outcome - .skills - .sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.path.cmp(&b.path))); + fn scope_rank(scope: SkillScope) -> u8 { + // Higher-priority scopes first (matches dedupe priority order). + match scope { + SkillScope::Repo => 0, + SkillScope::User => 1, + SkillScope::System => 2, + SkillScope::Admin => 3, + } + } - outcome -} + outcome.skills.sort_by(|a, b| { + scope_rank(a.scope) + .cmp(&scope_rank(b.scope)) + .then_with(|| a.name.cmp(&b.name)) + .then_with(|| a.path.cmp(&b.path)) + }); -pub(crate) fn user_skills_root(codex_home: &Path) -> SkillRoot { - SkillRoot { - path: codex_home.join(SKILLS_DIR_NAME), - scope: SkillScope::User, - } + outcome } -pub(crate) fn system_skills_root(codex_home: &Path) -> SkillRoot { - SkillRoot { - path: system_cache_root_dir(codex_home), - scope: SkillScope::System, - } -} +fn skill_roots_from_layer_stack_inner(config_layer_stack: &ConfigLayerStack) -> Vec { + let mut roots = Vec::new(); -pub(crate) fn admin_skills_root() -> SkillRoot { - SkillRoot { - path: PathBuf::from(ADMIN_SKILLS_ROOT), - scope: SkillScope::Admin, - } -} + for layer in config_layer_stack.layers_high_to_low() { + let Some(config_folder) = layer.config_folder() else { + continue; + }; -pub(crate) fn repo_skills_root(cwd: &Path) -> Option { - let base = if cwd.is_dir() { cwd } else { cwd.parent()? }; - let base = normalize_path(base).unwrap_or_else(|_| base.to_path_buf()); - - let repo_root = - resolve_root_git_project_for_trust(&base).map(|root| normalize_path(&root).unwrap_or(root)); - - let scope = SkillScope::Repo; - if let Some(repo_root) = repo_root.as_deref() { - for dir in base.ancestors() { - let skills_root = dir.join(REPO_ROOT_CONFIG_DIR_NAME).join(SKILLS_DIR_NAME); - if skills_root.is_dir() { - return Some(SkillRoot { - path: skills_root, - scope, + match &layer.name { + ConfigLayerSource::Project { .. } => { + roots.push(SkillRoot { + path: config_folder.as_path().join(SKILLS_DIR_NAME), + scope: SkillScope::Repo, }); } + ConfigLayerSource::User { .. } => { + // `$CODEX_HOME/skills` (user-installed skills). + roots.push(SkillRoot { + path: config_folder.as_path().join(SKILLS_DIR_NAME), + scope: SkillScope::User, + }); - if dir == repo_root { - break; + // Embedded system skills are cached under `$CODEX_HOME/skills/.system` and are a + // special case (not a config layer). + roots.push(SkillRoot { + path: system_cache_root_dir(config_folder.as_path()), + scope: SkillScope::System, + }); + } + ConfigLayerSource::System { .. } => { + // The system config layer lives under `/etc/codex/` on Unix, so treat + // `/etc/codex/skills` as admin-scoped skills. + roots.push(SkillRoot { + path: config_folder.as_path().join(SKILLS_DIR_NAME), + scope: SkillScope::Admin, + }); } + ConfigLayerSource::Mdm { .. } + | ConfigLayerSource::SessionFlags + | ConfigLayerSource::LegacyManagedConfigTomlFromFile { .. } + | ConfigLayerSource::LegacyManagedConfigTomlFromMdm => {} } - return None; - } - - let skills_root = base.join(REPO_ROOT_CONFIG_DIR_NAME).join(SKILLS_DIR_NAME); - skills_root.is_dir().then_some(SkillRoot { - path: skills_root, - scope, - }) -} - -pub(crate) fn skill_roots_for_cwd(codex_home: &Path, cwd: &Path) -> Vec { - let mut roots = Vec::new(); - - if let Some(repo_root) = repo_skills_root(cwd) { - roots.push(repo_root); - } - - // Load order matters: we dedupe by name, keeping the first occurrence. - // Priority order: repo, user, system, then admin. - roots.push(user_skills_root(codex_home)); - roots.push(system_skills_root(codex_home)); - if cfg!(unix) { - roots.push(admin_skills_root()); } roots } fn skill_roots(config: &Config) -> Vec { - skill_roots_for_cwd(&config.codex_home, &config.cwd) + skill_roots_from_layer_stack_inner(&config.config_layer_stack) +} + +pub(crate) fn skill_roots_from_layer_stack( + config_layer_stack: &ConfigLayerStack, +) -> Vec { + skill_roots_from_layer_stack_inner(config_layer_stack) } fn discover_skills_under_root(root: &Path, scope: SkillScope, outcome: &mut SkillLoadOutcome) { - let Ok(root) = normalize_path(root) else { + let Ok(root) = canonicalize_path(root) else { return; }; @@ -179,8 +176,38 @@ fn discover_skills_under_root(root: &Path, scope: SkillScope, outcome: &mut Skil return; } - let mut queue: VecDeque = VecDeque::from([root]); - while let Some(dir) = queue.pop_front() { + fn enqueue_dir( + queue: &mut VecDeque<(PathBuf, usize)>, + visited_dirs: &mut HashSet, + truncated_by_dir_limit: &mut bool, + path: PathBuf, + depth: usize, + ) { + if depth > MAX_SCAN_DEPTH { + return; + } + if visited_dirs.len() >= MAX_SKILLS_DIRS_PER_ROOT { + *truncated_by_dir_limit = true; + return; + } + if visited_dirs.insert(path.clone()) { + queue.push_back((path, depth)); + } + } + + // Follow symlinks for user, admin, and repo skills. System skills are written by Codex itself. + let follow_symlinks = matches!( + scope, + SkillScope::Repo | SkillScope::User | SkillScope::Admin + ); + + let mut visited_dirs: HashSet = HashSet::new(); + visited_dirs.insert(root.clone()); + + let mut queue: VecDeque<(PathBuf, usize)> = VecDeque::from([(root.clone(), 0)]); + let mut truncated_by_dir_limit = false; + + while let Some((dir, depth)) = queue.pop_front() { let entries = match fs::read_dir(&dir) { Ok(entries) => entries, Err(e) => { @@ -205,11 +232,64 @@ fn discover_skills_under_root(root: &Path, scope: SkillScope, outcome: &mut Skil }; if file_type.is_symlink() { + if !follow_symlinks { + continue; + } + + // Follow the symlink to determine what it points to. + let metadata = match fs::metadata(&path) { + Ok(metadata) => metadata, + Err(e) => { + error!( + "failed to stat skills entry {} (symlink): {e:#}", + path.display() + ); + continue; + } + }; + + if metadata.is_dir() { + let Ok(resolved_dir) = canonicalize_path(&path) else { + continue; + }; + enqueue_dir( + &mut queue, + &mut visited_dirs, + &mut truncated_by_dir_limit, + resolved_dir, + depth + 1, + ); + continue; + } + + if metadata.is_file() && file_name == SKILLS_FILENAME { + match parse_skill_file(&path, scope) { + Ok(skill) => outcome.skills.push(skill), + Err(err) => { + if scope != SkillScope::System { + outcome.errors.push(SkillError { + path, + message: err.to_string(), + }); + } + } + } + } + continue; } if file_type.is_dir() { - queue.push_back(path); + let Ok(resolved_dir) = canonicalize_path(&path) else { + continue; + }; + enqueue_dir( + &mut queue, + &mut visited_dirs, + &mut truncated_by_dir_limit, + resolved_dir, + depth + 1, + ); continue; } @@ -230,6 +310,14 @@ fn discover_skills_under_root(root: &Path, scope: SkillScope, outcome: &mut Skil } } } + + if truncated_by_dir_limit { + tracing::warn!( + "skills scan truncated after {} directories (root: {})", + MAX_SKILLS_DIRS_PER_ROOT, + root.display() + ); + } } fn parse_skill_file(path: &Path, scope: SkillScope) -> Result { @@ -259,7 +347,7 @@ fn parse_skill_file(path: &Path, scope: SkillScope) -> Result Option { mod tests { use super::*; use crate::config::ConfigBuilder; + use crate::config::ConfigOverrides; + use crate::config_loader::ConfigLayerEntry; + use crate::config_loader::ConfigLayerStack; + use crate::config_loader::ConfigRequirements; + use crate::config_loader::ConfigRequirementsToml; use codex_protocol::protocol::SkillScope; + use codex_utils_absolute_path::AbsolutePathBuf; use pretty_assertions::assert_eq; use std::path::Path; - use std::process::Command; use tempfile::TempDir; + use toml::Value as TomlValue; + + const REPO_ROOT_CONFIG_DIR_NAME: &str = ".codex"; async fn make_config(codex_home: &TempDir) -> Config { - let mut config = ConfigBuilder::default() + make_config_for_cwd(codex_home, codex_home.path().to_path_buf()).await + } + + async fn make_config_for_cwd(codex_home: &TempDir, cwd: PathBuf) -> Config { + let harness_overrides = ConfigOverrides { + cwd: Some(cwd), + ..Default::default() + }; + + ConfigBuilder::default() .codex_home(codex_home.path().to_path_buf()) + .harness_overrides(harness_overrides) .build() .await - .expect("defaults for test should always succeed"); + .expect("defaults for test should always succeed") + } + + fn mark_as_git_repo(dir: &Path) { + // Config/project-root discovery only checks for the presence of `.git` (file or dir), + // so we can avoid shelling out to `git init` in tests. + fs::write(dir.join(".git"), "gitdir: fake\n").unwrap(); + } + + fn normalized(path: &Path) -> PathBuf { + canonicalize_path(path).unwrap_or_else(|_| path.to_path_buf()) + } + + #[test] + fn skill_roots_from_layer_stack_maps_user_to_user_and_system_cache_and_system_to_admin() + -> anyhow::Result<()> { + let tmp = tempfile::tempdir()?; + + let system_folder = tmp.path().join("etc/codex"); + let user_folder = tmp.path().join("home/codex"); + fs::create_dir_all(&system_folder)?; + fs::create_dir_all(&user_folder)?; + + // The file path doesn't need to exist; it's only used to derive the config folder. + let system_file = AbsolutePathBuf::from_absolute_path(system_folder.join("config.toml"))?; + let user_file = AbsolutePathBuf::from_absolute_path(user_folder.join("config.toml"))?; + + let layers = vec![ + ConfigLayerEntry::new( + ConfigLayerSource::System { file: system_file }, + TomlValue::Table(toml::map::Map::new()), + ), + ConfigLayerEntry::new( + ConfigLayerSource::User { file: user_file }, + TomlValue::Table(toml::map::Map::new()), + ), + ]; + let stack = ConfigLayerStack::new( + layers, + ConfigRequirements::default(), + ConfigRequirementsToml::default(), + )?; + + let got = skill_roots_from_layer_stack(&stack) + .into_iter() + .map(|root| (root.scope, root.path)) + .collect::>(); + + assert_eq!( + got, + vec![ + (SkillScope::User, user_folder.join("skills")), + ( + SkillScope::System, + user_folder.join("skills").join(".system") + ), + (SkillScope::Admin, system_folder.join("skills")), + ] + ); - config.cwd = codex_home.path().to_path_buf(); - config + Ok(()) } fn write_skill(codex_home: &TempDir, dir: &str, name: &str, description: &str) -> PathBuf { @@ -365,27 +528,264 @@ mod tests { path } + #[cfg(unix)] + fn symlink_dir(target: &Path, link: &Path) { + std::os::unix::fs::symlink(target, link).unwrap(); + } + + #[cfg(unix)] + fn symlink_file(target: &Path, link: &Path) { + std::os::unix::fs::symlink(target, link).unwrap(); + } + #[tokio::test] - async fn loads_valid_skill() { + #[cfg(unix)] + async fn loads_skills_via_symlinked_subdir_for_user_scope() { let codex_home = tempfile::tempdir().expect("tempdir"); - write_skill(&codex_home, "demo", "demo-skill", "does things\ncarefully"); + let shared = tempfile::tempdir().expect("tempdir"); + + let shared_skill_path = write_skill_at(shared.path(), "demo", "linked-skill", "from link"); + + fs::create_dir_all(codex_home.path().join("skills")).unwrap(); + symlink_dir(shared.path(), &codex_home.path().join("skills/shared")); + let cfg = make_config(&codex_home).await; + let outcome = load_skills(&cfg); + + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "linked-skill".to_string(), + description: "from link".to_string(), + short_description: None, + path: normalized(&shared_skill_path), + scope: SkillScope::User, + }] + ); + } + #[tokio::test] + #[cfg(unix)] + async fn loads_skills_via_symlinked_skill_file_for_user_scope() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let shared = tempfile::tempdir().expect("tempdir"); + + let shared_skill_path = + write_skill_at(shared.path(), "demo", "linked-file-skill", "from link"); + + let skill_dir = codex_home.path().join("skills/demo"); + fs::create_dir_all(&skill_dir).unwrap(); + symlink_file(&shared_skill_path, &skill_dir.join(SKILLS_FILENAME)); + + let cfg = make_config(&codex_home).await; let outcome = load_skills(&cfg); + assert!( outcome.errors.is_empty(), "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - let skill = &outcome.skills[0]; - assert_eq!(skill.name, "demo-skill"); - assert_eq!(skill.description, "does things carefully"); - assert_eq!(skill.short_description, None); - let path_str = skill.path.to_string_lossy().replace('\\', "/"); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "linked-file-skill".to_string(), + description: "from link".to_string(), + short_description: None, + path: normalized(&shared_skill_path), + scope: SkillScope::User, + }] + ); + } + + #[tokio::test] + #[cfg(unix)] + async fn does_not_loop_on_symlink_cycle_for_user_scope() { + let codex_home = tempfile::tempdir().expect("tempdir"); + + // Create a cycle: + // $CODEX_HOME/skills/cycle/loop -> $CODEX_HOME/skills/cycle + let cycle_dir = codex_home.path().join("skills/cycle"); + fs::create_dir_all(&cycle_dir).unwrap(); + symlink_dir(&cycle_dir, &cycle_dir.join("loop")); + + let skill_path = write_skill_at(&cycle_dir, "demo", "cycle-skill", "still loads"); + + let cfg = make_config(&codex_home).await; + let outcome = load_skills(&cfg); + + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "cycle-skill".to_string(), + description: "still loads".to_string(), + short_description: None, + path: normalized(&skill_path), + scope: SkillScope::User, + }] + ); + } + + #[test] + #[cfg(unix)] + fn loads_skills_via_symlinked_subdir_for_admin_scope() { + let admin_root = tempfile::tempdir().expect("tempdir"); + let shared = tempfile::tempdir().expect("tempdir"); + + let shared_skill_path = + write_skill_at(shared.path(), "demo", "admin-linked-skill", "from link"); + fs::create_dir_all(admin_root.path()).unwrap(); + symlink_dir(shared.path(), &admin_root.path().join("shared")); + + let outcome = load_skills_from_roots([SkillRoot { + path: admin_root.path().to_path_buf(), + scope: SkillScope::Admin, + }]); + + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "admin-linked-skill".to_string(), + description: "from link".to_string(), + short_description: None, + path: normalized(&shared_skill_path), + scope: SkillScope::Admin, + }] + ); + } + + #[tokio::test] + #[cfg(unix)] + async fn loads_skills_via_symlinked_subdir_for_repo_scope() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let repo_dir = tempfile::tempdir().expect("tempdir"); + mark_as_git_repo(repo_dir.path()); + let shared = tempfile::tempdir().expect("tempdir"); + + let linked_skill_path = + write_skill_at(shared.path(), "demo", "repo-linked-skill", "from link"); + let repo_skills_root = repo_dir + .path() + .join(REPO_ROOT_CONFIG_DIR_NAME) + .join(SKILLS_DIR_NAME); + fs::create_dir_all(&repo_skills_root).unwrap(); + symlink_dir(shared.path(), &repo_skills_root.join("shared")); + + let cfg = make_config_for_cwd(&codex_home, repo_dir.path().to_path_buf()).await; + let outcome = load_skills(&cfg); + assert!( - path_str.ends_with("skills/demo/SKILL.md"), - "unexpected path {path_str}" + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "repo-linked-skill".to_string(), + description: "from link".to_string(), + short_description: None, + path: normalized(&linked_skill_path), + scope: SkillScope::Repo, + }] + ); + } + + #[tokio::test] + #[cfg(unix)] + async fn system_scope_ignores_symlinked_subdir() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let shared = tempfile::tempdir().expect("tempdir"); + + write_skill_at(shared.path(), "demo", "system-linked-skill", "from link"); + + let system_root = codex_home.path().join("skills/.system"); + fs::create_dir_all(&system_root).unwrap(); + symlink_dir(shared.path(), &system_root.join("shared")); + + let cfg = make_config(&codex_home).await; + let outcome = load_skills(&cfg); + + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!(outcome.skills.len(), 0); + } + + #[tokio::test] + async fn respects_max_scan_depth_for_user_scope() { + let codex_home = tempfile::tempdir().expect("tempdir"); + + let within_depth_path = write_skill( + &codex_home, + "d0/d1/d2/d3/d4/d5", + "within-depth-skill", + "loads", + ); + let _too_deep_path = write_skill( + &codex_home, + "d0/d1/d2/d3/d4/d5/d6", + "too-deep-skill", + "should not load", + ); + + let cfg = make_config(&codex_home).await; + let outcome = load_skills(&cfg); + + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "within-depth-skill".to_string(), + description: "loads".to_string(), + short_description: None, + path: normalized(&within_depth_path), + scope: SkillScope::User, + }] + ); + } + + #[tokio::test] + async fn loads_valid_skill() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let skill_path = write_skill(&codex_home, "demo", "demo-skill", "does things\ncarefully"); + let cfg = make_config(&codex_home).await; + + let outcome = load_skills(&cfg); + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "demo-skill".to_string(), + description: "does things carefully".to_string(), + short_description: None, + path: normalized(&skill_path), + scope: SkillScope::User, + }] ); } @@ -395,7 +795,8 @@ mod tests { let skill_dir = codex_home.path().join("skills/demo"); fs::create_dir_all(&skill_dir).unwrap(); let contents = "---\nname: demo-skill\ndescription: long description\nmetadata:\n short-description: short summary\n---\n\n# Body\n"; - fs::write(skill_dir.join(SKILLS_FILENAME), contents).unwrap(); + let skill_path = skill_dir.join(SKILLS_FILENAME); + fs::write(&skill_path, contents).unwrap(); let cfg = make_config(&codex_home).await; let outcome = load_skills(&cfg); @@ -404,10 +805,15 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); assert_eq!( - outcome.skills[0].short_description, - Some("short summary".to_string()) + outcome.skills, + vec![SkillMetadata { + name: "demo-skill".to_string(), + description: "long description".to_string(), + short_description: Some("short summary".to_string()), + path: normalized(&skill_path), + scope: SkillScope::User, + }] ); } @@ -493,22 +899,14 @@ mod tests { async fn loads_skills_from_repo_root() { let codex_home = tempfile::tempdir().expect("tempdir"); let repo_dir = tempfile::tempdir().expect("tempdir"); - - let status = Command::new("git") - .arg("init") - .current_dir(repo_dir.path()) - .status() - .expect("git init"); - assert!(status.success(), "git init failed"); + mark_as_git_repo(repo_dir.path()); let skills_root = repo_dir .path() .join(REPO_ROOT_CONFIG_DIR_NAME) .join(SKILLS_DIR_NAME); - write_skill_at(&skills_root, "repo", "repo-skill", "from repo"); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = repo_dir.path().to_path_buf(); - let repo_root = normalize_path(&skills_root).unwrap_or_else(|_| skills_root.clone()); + let skill_path = write_skill_at(&skills_root, "repo", "repo-skill", "from repo"); + let cfg = make_config_for_cwd(&codex_home, repo_dir.path().to_path_buf()).await; let outcome = load_skills(&cfg); assert!( @@ -516,28 +914,28 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - let skill = &outcome.skills[0]; - assert_eq!(skill.name, "repo-skill"); - assert!(skill.path.starts_with(&repo_root)); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "repo-skill".to_string(), + description: "from repo".to_string(), + short_description: None, + path: normalized(&skill_path), + scope: SkillScope::Repo, + }] + ); } #[tokio::test] - async fn loads_skills_from_nearest_codex_dir_under_repo_root() { + async fn loads_skills_from_all_codex_dirs_under_project_root() { let codex_home = tempfile::tempdir().expect("tempdir"); let repo_dir = tempfile::tempdir().expect("tempdir"); - - let status = Command::new("git") - .arg("init") - .current_dir(repo_dir.path()) - .status() - .expect("git init"); - assert!(status.success(), "git init failed"); + mark_as_git_repo(repo_dir.path()); let nested_dir = repo_dir.path().join("nested/inner"); fs::create_dir_all(&nested_dir).unwrap(); - write_skill_at( + let root_skill_path = write_skill_at( &repo_dir .path() .join(REPO_ROOT_CONFIG_DIR_NAME) @@ -546,7 +944,7 @@ mod tests { "root-skill", "from root", ); - write_skill_at( + let nested_skill_path = write_skill_at( &repo_dir .path() .join("nested") @@ -557,8 +955,7 @@ mod tests { "from nested", ); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = nested_dir; + let cfg = make_config_for_cwd(&codex_home, nested_dir).await; let outcome = load_skills(&cfg); assert!( @@ -566,8 +963,25 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "nested-skill"); + assert_eq!( + outcome.skills, + vec![ + SkillMetadata { + name: "nested-skill".to_string(), + description: "from nested".to_string(), + short_description: None, + path: normalized(&nested_skill_path), + scope: SkillScope::Repo, + }, + SkillMetadata { + name: "root-skill".to_string(), + description: "from root".to_string(), + short_description: None, + path: normalized(&root_skill_path), + scope: SkillScope::Repo, + }, + ] + ); } #[tokio::test] @@ -575,7 +989,7 @@ mod tests { let codex_home = tempfile::tempdir().expect("tempdir"); let work_dir = tempfile::tempdir().expect("tempdir"); - write_skill_at( + let skill_path = write_skill_at( &work_dir .path() .join(REPO_ROOT_CONFIG_DIR_NAME) @@ -585,8 +999,7 @@ mod tests { "from cwd", ); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = work_dir.path().to_path_buf(); + let cfg = make_config_for_cwd(&codex_home, work_dir.path().to_path_buf()).await; let outcome = load_skills(&cfg); assert!( @@ -594,25 +1007,26 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "local-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::Repo); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "local-skill".to_string(), + description: "from cwd".to_string(), + short_description: None, + path: normalized(&skill_path), + scope: SkillScope::Repo, + }] + ); } #[tokio::test] async fn deduplicates_by_name_preferring_repo_over_user() { let codex_home = tempfile::tempdir().expect("tempdir"); let repo_dir = tempfile::tempdir().expect("tempdir"); + mark_as_git_repo(repo_dir.path()); - let status = Command::new("git") - .arg("init") - .current_dir(repo_dir.path()) - .status() - .expect("git init"); - assert!(status.success(), "git init failed"); - - write_skill(&codex_home, "user", "dupe-skill", "from user"); - write_skill_at( + let _user_skill_path = write_skill(&codex_home, "user", "dupe-skill", "from user"); + let repo_skill_path = write_skill_at( &repo_dir .path() .join(REPO_ROOT_CONFIG_DIR_NAME) @@ -622,8 +1036,7 @@ mod tests { "from repo", ); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = repo_dir.path().to_path_buf(); + let cfg = make_config_for_cwd(&codex_home, repo_dir.path().to_path_buf()).await; let outcome = load_skills(&cfg); assert!( @@ -631,17 +1044,25 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "dupe-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::Repo); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "dupe-skill".to_string(), + description: "from repo".to_string(), + short_description: None, + path: normalized(&repo_skill_path), + scope: SkillScope::Repo, + }] + ); } #[tokio::test] async fn loads_system_skills_when_present() { let codex_home = tempfile::tempdir().expect("tempdir"); - write_system_skill(&codex_home, "system", "dupe-skill", "from system"); - write_skill(&codex_home, "user", "dupe-skill", "from user"); + let _system_skill_path = + write_system_skill(&codex_home, "system", "dupe-skill", "from system"); + let user_skill_path = write_skill(&codex_home, "user", "dupe-skill", "from user"); let cfg = make_config(&codex_home).await; let outcome = load_skills(&cfg); @@ -650,9 +1071,16 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].description, "from user"); - assert_eq!(outcome.skills[0].scope, SkillScope::User); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "dupe-skill".to_string(), + description: "from user".to_string(), + short_description: None, + path: normalized(&user_skill_path), + scope: SkillScope::User, + }] + ); } #[tokio::test] @@ -672,15 +1100,9 @@ mod tests { "from outer", ); - let status = Command::new("git") - .arg("init") - .current_dir(&repo_dir) - .status() - .expect("git init"); - assert!(status.success(), "git init failed"); + mark_as_git_repo(&repo_dir); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = repo_dir; + let cfg = make_config_for_cwd(&codex_home, repo_dir).await; let outcome = load_skills(&cfg); assert!( @@ -695,15 +1117,9 @@ mod tests { async fn loads_skills_when_cwd_is_file_in_repo() { let codex_home = tempfile::tempdir().expect("tempdir"); let repo_dir = tempfile::tempdir().expect("tempdir"); + mark_as_git_repo(repo_dir.path()); - let status = Command::new("git") - .arg("init") - .current_dir(repo_dir.path()) - .status() - .expect("git init"); - assert!(status.success(), "git init failed"); - - write_skill_at( + let skill_path = write_skill_at( &repo_dir .path() .join(REPO_ROOT_CONFIG_DIR_NAME) @@ -715,8 +1131,7 @@ mod tests { let file_path = repo_dir.path().join("some-file.txt"); fs::write(&file_path, "contents").unwrap(); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = file_path; + let cfg = make_config_for_cwd(&codex_home, file_path).await; let outcome = load_skills(&cfg); assert!( @@ -724,9 +1139,16 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "repo-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::Repo); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "repo-skill".to_string(), + description: "from repo".to_string(), + short_description: None, + path: normalized(&skill_path), + scope: SkillScope::Repo, + }] + ); } #[tokio::test] @@ -746,8 +1168,7 @@ mod tests { "from outer", ); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = nested_dir; + let cfg = make_config_for_cwd(&codex_home, nested_dir).await; let outcome = load_skills(&cfg); assert!( @@ -763,10 +1184,9 @@ mod tests { let codex_home = tempfile::tempdir().expect("tempdir"); let work_dir = tempfile::tempdir().expect("tempdir"); - write_system_skill(&codex_home, "system", "system-skill", "from system"); + let skill_path = write_system_skill(&codex_home, "system", "system-skill", "from system"); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = work_dir.path().to_path_buf(); + let cfg = make_config_for_cwd(&codex_home, work_dir.path().to_path_buf()).await; let outcome = load_skills(&cfg); assert!( @@ -774,9 +1194,16 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "system-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::System); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "system-skill".to_string(), + description: "from system".to_string(), + short_description: None, + path: normalized(&skill_path), + scope: SkillScope::System, + }] + ); } #[tokio::test] @@ -800,8 +1227,10 @@ mod tests { let system_dir = tempfile::tempdir().expect("tempdir"); let admin_dir = tempfile::tempdir().expect("tempdir"); - write_skill_at(system_dir.path(), "system", "dupe-skill", "from system"); - write_skill_at(admin_dir.path(), "admin", "dupe-skill", "from admin"); + let system_skill_path = + write_skill_at(system_dir.path(), "system", "dupe-skill", "from system"); + let _admin_skill_path = + write_skill_at(admin_dir.path(), "admin", "dupe-skill", "from admin"); let outcome = load_skills_from_roots([ SkillRoot { @@ -819,9 +1248,16 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "dupe-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::System); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "dupe-skill".to_string(), + description: "from system".to_string(), + short_description: None, + path: normalized(&system_skill_path), + scope: SkillScope::System, + }] + ); } #[tokio::test] @@ -829,11 +1265,11 @@ mod tests { let codex_home = tempfile::tempdir().expect("tempdir"); let work_dir = tempfile::tempdir().expect("tempdir"); - write_skill(&codex_home, "user", "dupe-skill", "from user"); - write_system_skill(&codex_home, "system", "dupe-skill", "from system"); + let user_skill_path = write_skill(&codex_home, "user", "dupe-skill", "from user"); + let _system_skill_path = + write_system_skill(&codex_home, "system", "dupe-skill", "from system"); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = work_dir.path().to_path_buf(); + let cfg = make_config_for_cwd(&codex_home, work_dir.path().to_path_buf()).await; let outcome = load_skills(&cfg); assert!( @@ -841,24 +1277,25 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "dupe-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::User); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "dupe-skill".to_string(), + description: "from user".to_string(), + short_description: None, + path: normalized(&user_skill_path), + scope: SkillScope::User, + }] + ); } #[tokio::test] async fn deduplicates_by_name_preferring_repo_over_system() { let codex_home = tempfile::tempdir().expect("tempdir"); let repo_dir = tempfile::tempdir().expect("tempdir"); + mark_as_git_repo(repo_dir.path()); - let status = Command::new("git") - .arg("init") - .current_dir(repo_dir.path()) - .status() - .expect("git init"); - assert!(status.success(), "git init failed"); - - write_skill_at( + let repo_skill_path = write_skill_at( &repo_dir .path() .join(REPO_ROOT_CONFIG_DIR_NAME) @@ -867,10 +1304,10 @@ mod tests { "dupe-skill", "from repo", ); - write_system_skill(&codex_home, "system", "dupe-skill", "from system"); + let _system_skill_path = + write_system_skill(&codex_home, "system", "dupe-skill", "from system"); - let mut cfg = make_config(&codex_home).await; - cfg.cwd = repo_dir.path().to_path_buf(); + let cfg = make_config_for_cwd(&codex_home, repo_dir.path().to_path_buf()).await; let outcome = load_skills(&cfg); assert!( @@ -878,8 +1315,66 @@ mod tests { "unexpected errors: {:?}", outcome.errors ); - assert_eq!(outcome.skills.len(), 1); - assert_eq!(outcome.skills[0].name, "dupe-skill"); - assert_eq!(outcome.skills[0].scope, SkillScope::Repo); + assert_eq!( + outcome.skills, + vec![SkillMetadata { + name: "dupe-skill".to_string(), + description: "from repo".to_string(), + short_description: None, + path: normalized(&repo_skill_path), + scope: SkillScope::Repo, + }] + ); + } + + #[tokio::test] + async fn deduplicates_by_name_preferring_nearest_project_codex_dir() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let repo_dir = tempfile::tempdir().expect("tempdir"); + mark_as_git_repo(repo_dir.path()); + + let nested_dir = repo_dir.path().join("nested/inner"); + fs::create_dir_all(&nested_dir).unwrap(); + + let _root_skill_path = write_skill_at( + &repo_dir + .path() + .join(REPO_ROOT_CONFIG_DIR_NAME) + .join(SKILLS_DIR_NAME), + "root", + "dupe-skill", + "from root", + ); + let nested_skill_path = write_skill_at( + &repo_dir + .path() + .join("nested") + .join(REPO_ROOT_CONFIG_DIR_NAME) + .join(SKILLS_DIR_NAME), + "nested", + "dupe-skill", + "from nested", + ); + + let cfg = make_config_for_cwd(&codex_home, nested_dir).await; + let outcome = load_skills(&cfg); + + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + let expected_path = + canonicalize_path(&nested_skill_path).unwrap_or_else(|_| nested_skill_path.clone()); + assert_eq!( + vec![SkillMetadata { + name: "dupe-skill".to_string(), + description: "from nested".to_string(), + short_description: None, + path: expected_path, + scope: SkillScope::Repo, + }], + outcome.skills + ); } } diff --git a/codex-rs/core/src/skills/manager.rs b/codex-rs/core/src/skills/manager.rs index 8cc93d05bc2..bff928a5657 100644 --- a/codex-rs/core/src/skills/manager.rs +++ b/codex-rs/core/src/skills/manager.rs @@ -3,10 +3,17 @@ use std::path::Path; use std::path::PathBuf; use std::sync::RwLock; +use codex_utils_absolute_path::AbsolutePathBuf; +use toml::Value as TomlValue; + +use crate::config::Config; +use crate::config_loader::LoaderOverrides; +use crate::config_loader::load_config_layers_state; use crate::skills::SkillLoadOutcome; use crate::skills::loader::load_skills_from_roots; -use crate::skills::loader::skill_roots_for_cwd; +use crate::skills::loader::skill_roots_from_layer_stack; use crate::skills::system::install_system_skills; + pub struct SkillsManager { codex_home: PathBuf, cache_by_cwd: RwLock>, @@ -24,11 +31,32 @@ impl SkillsManager { } } - pub fn skills_for_cwd(&self, cwd: &Path) -> SkillLoadOutcome { - self.skills_for_cwd_with_options(cwd, false) + /// Load skills for an already-constructed [`Config`], avoiding any additional config-layer + /// loading. This also seeds the per-cwd cache for subsequent lookups. + pub fn skills_for_config(&self, config: &Config) -> SkillLoadOutcome { + let cwd = &config.cwd; + let cached = match self.cache_by_cwd.read() { + Ok(cache) => cache.get(cwd).cloned(), + Err(err) => err.into_inner().get(cwd).cloned(), + }; + if let Some(outcome) = cached { + return outcome; + } + + let roots = skill_roots_from_layer_stack(&config.config_layer_stack); + let outcome = load_skills_from_roots(roots); + match self.cache_by_cwd.write() { + Ok(mut cache) => { + cache.insert(cwd.to_path_buf(), outcome.clone()); + } + Err(err) => { + err.into_inner().insert(cwd.to_path_buf(), outcome.clone()); + } + } + outcome } - pub fn skills_for_cwd_with_options(&self, cwd: &Path, force_reload: bool) -> SkillLoadOutcome { + pub async fn skills_for_cwd(&self, cwd: &Path, force_reload: bool) -> SkillLoadOutcome { let cached = match self.cache_by_cwd.read() { Ok(cache) => cache.get(cwd).cloned(), Err(err) => err.into_inner().get(cwd).cloned(), @@ -37,7 +65,41 @@ impl SkillsManager { return outcome; } - let roots = skill_roots_for_cwd(&self.codex_home, cwd); + let cwd_abs = match AbsolutePathBuf::try_from(cwd) { + Ok(cwd_abs) => cwd_abs, + Err(err) => { + return SkillLoadOutcome { + errors: vec![crate::skills::model::SkillError { + path: cwd.to_path_buf(), + message: err.to_string(), + }], + ..Default::default() + }; + } + }; + + let cli_overrides: Vec<(String, TomlValue)> = Vec::new(); + let config_layer_stack = match load_config_layers_state( + &self.codex_home, + Some(cwd_abs), + &cli_overrides, + LoaderOverrides::default(), + ) + .await + { + Ok(config_layer_stack) => config_layer_stack, + Err(err) => { + return SkillLoadOutcome { + errors: vec![crate::skills::model::SkillError { + path: cwd.to_path_buf(), + message: err.to_string(), + }], + ..Default::default() + }; + } + }; + + let roots = skill_roots_from_layer_stack(&config_layer_stack); let outcome = load_skills_from_roots(roots); match self.cache_by_cwd.write() { Ok(mut cache) => { @@ -50,3 +112,52 @@ impl SkillsManager { outcome } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::ConfigBuilder; + use crate::config::ConfigOverrides; + use pretty_assertions::assert_eq; + use std::fs; + use tempfile::TempDir; + + fn write_user_skill(codex_home: &TempDir, dir: &str, name: &str, description: &str) { + let skill_dir = codex_home.path().join("skills").join(dir); + fs::create_dir_all(&skill_dir).unwrap(); + let content = format!("---\nname: {name}\ndescription: {description}\n---\n\n# Body\n"); + fs::write(skill_dir.join("SKILL.md"), content).unwrap(); + } + + #[tokio::test] + async fn skills_for_config_seeds_cache_by_cwd() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let cwd = tempfile::tempdir().expect("tempdir"); + + let cfg = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .harness_overrides(ConfigOverrides { + cwd: Some(cwd.path().to_path_buf()), + ..Default::default() + }) + .build() + .await + .expect("defaults for test should always succeed"); + + let skills_manager = SkillsManager::new(codex_home.path().to_path_buf()); + + write_user_skill(&codex_home, "a", "skill-a", "from a"); + let outcome1 = skills_manager.skills_for_config(&cfg); + assert!( + outcome1.skills.iter().any(|s| s.name == "skill-a"), + "expected skill-a to be discovered" + ); + + // Write a new skill after the first call; the second call should hit the cache and not + // reflect the new file. + write_user_skill(&codex_home, "b", "skill-b", "from b"); + let outcome2 = skills_manager.skills_for_config(&cfg); + assert_eq!(outcome2.errors, outcome1.errors); + assert_eq!(outcome2.skills, outcome1.skills); + } +} diff --git a/codex-rs/core/src/skills/render.rs b/codex-rs/core/src/skills/render.rs index f767849b243..f998a51042e 100644 --- a/codex-rs/core/src/skills/render.rs +++ b/codex-rs/core/src/skills/render.rs @@ -7,7 +7,8 @@ pub fn render_skills_section(skills: &[SkillMetadata]) -> Option { let mut lines: Vec = Vec::new(); lines.push("## Skills".to_string()); - lines.push("These skills are discovered at startup from multiple local sources. Each entry includes a name, description, and file path so you can open the source for full instructions.".to_string()); + lines.push("A skill is a set of local instructions to follow that is stored in a `SKILL.md` file. Below is the list of skills that can be used. Each entry includes a name, description, and file path so you can open the source for full instructions when using a specific skill.".to_string()); + lines.push("### Available skills".to_string()); for skill in skills { let path_str = skill.path.to_string_lossy().replace('\\', "/"); @@ -16,22 +17,22 @@ pub fn render_skills_section(skills: &[SkillMetadata]) -> Option { lines.push(format!("- {name}: {description} (file: {path_str})")); } + lines.push("### How to use skills".to_string()); lines.push( - r###"- Discovery: Available skills are listed in project docs and may also appear in a runtime "## Skills" section (name + description + file path). These are the sources of truth; skill bodies live on disk at the listed paths. -- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned. + r###"- Discovery: The list above is the skills available in this session (name + description + file path). Skill bodies live on disk at the listed paths. +- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description shown above, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned. - Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback. - How to use a skill (progressive disclosure): 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow. 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything. 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks. 4) If `assets/` or templates exist, reuse them instead of recreating from scratch. -- Description as trigger: The YAML `description` in `SKILL.md` is the primary trigger signal; rely on it to decide applicability. If unsure, ask a brief clarification before proceeding. - Coordination and sequencing: - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them. - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why. - Context hygiene: - Keep context small: summarize long sections instead of pasting them; only load extra files when needed. - - Avoid deeply nested references; prefer one-hop files explicitly linked from `SKILL.md`. + - Avoid deep reference-chasing: prefer opening only files directly linked from `SKILL.md` unless you're blocked. - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice. - Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."### .to_string(), diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs index 63b66647e5b..2e4395956a5 100644 --- a/codex-rs/core/src/state/service.rs +++ b/codex-rs/core/src/state/service.rs @@ -2,14 +2,15 @@ use std::sync::Arc; use crate::AuthManager; use crate::RolloutRecorder; +use crate::agent::AgentControl; use crate::exec_policy::ExecPolicyManager; use crate::mcp_connection_manager::McpConnectionManager; use crate::models_manager::manager::ModelsManager; use crate::skills::SkillsManager; use crate::tools::sandboxing::ApprovalStore; -use crate::unified_exec::UnifiedExecSessionManager; +use crate::unified_exec::UnifiedExecProcessManager; use crate::user_notification::UserNotifier; -use codex_otel::otel_manager::OtelManager; +use codex_otel::OtelManager; use tokio::sync::Mutex; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; @@ -17,7 +18,7 @@ use tokio_util::sync::CancellationToken; pub(crate) struct SessionServices { pub(crate) mcp_connection_manager: Arc>, pub(crate) mcp_startup_cancellation_token: CancellationToken, - pub(crate) unified_exec_manager: UnifiedExecSessionManager, + pub(crate) unified_exec_manager: UnifiedExecProcessManager, pub(crate) notifier: UserNotifier, pub(crate) rollout: Mutex>, pub(crate) user_shell: Arc, @@ -28,4 +29,5 @@ pub(crate) struct SessionServices { pub(crate) otel_manager: OtelManager, pub(crate) tool_approvals: Mutex, pub(crate) skills_manager: Arc, + pub(crate) agent_control: AgentControl, } diff --git a/codex-rs/core/src/tasks/compact.rs b/codex-rs/core/src/tasks/compact.rs index a2a268fbb36..4b5f0d1cfb3 100644 --- a/codex-rs/core/src/tasks/compact.rs +++ b/codex-rs/core/src/tasks/compact.rs @@ -29,8 +29,16 @@ impl SessionTask for CompactTask { session.as_ref(), &ctx.client.get_provider(), ) { + let _ = session + .services + .otel_manager + .counter("codex.task.compact.remote", 1, &[]); crate::compact_remote::run_remote_compact_task(session, ctx).await } else { + let _ = session + .services + .otel_manager + .counter("codex.task.compact.local", 1, &[]); crate::compact::run_compact_task(session, ctx, input).await } diff --git a/codex-rs/core/src/tasks/mod.rs b/codex-rs/core/src/tasks/mod.rs index 4601f9d244d..a817d51cd6e 100644 --- a/codex-rs/core/src/tasks/mod.rs +++ b/codex-rs/core/src/tasks/mod.rs @@ -159,7 +159,7 @@ impl Session { for task in self.take_all_running_tasks().await { self.handle_task_abort(task, reason.clone()).await; } - self.close_unified_exec_sessions().await; + self.close_unified_exec_processes().await; } pub async fn on_task_finished( @@ -168,7 +168,7 @@ impl Session { last_agent_message: Option, ) { let mut active = self.active_turn.lock().await; - let should_close_sessions = if let Some(at) = active.as_mut() + let should_close_processes = if let Some(at) = active.as_mut() && at.remove_task(&turn_context.sub_id) { *active = None; @@ -177,8 +177,8 @@ impl Session { false }; drop(active); - if should_close_sessions { - self.close_unified_exec_sessions().await; + if should_close_processes { + self.close_unified_exec_processes().await; } let event = EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }); self.send_event(turn_context.as_ref(), event).await; @@ -203,10 +203,10 @@ impl Session { } } - async fn close_unified_exec_sessions(&self) { + async fn close_unified_exec_processes(&self) { self.services .unified_exec_manager - .terminate_all_sessions() + .terminate_all_processes() .await; } diff --git a/codex-rs/core/src/tasks/review.rs b/codex-rs/core/src/tasks/review.rs index 00dbc51f405..56a1583e1a2 100644 --- a/codex-rs/core/src/tasks/review.rs +++ b/codex-rs/core/src/tasks/review.rs @@ -15,7 +15,7 @@ use tokio_util::sync::CancellationToken; use crate::codex::Session; use crate::codex::TurnContext; -use crate::codex_delegate::run_codex_conversation_one_shot; +use crate::codex_delegate::run_codex_thread_one_shot; use crate::review_format::format_review_findings_block; use crate::review_format::render_review_output_text; use crate::state::TaskKind; @@ -46,6 +46,12 @@ impl SessionTask for ReviewTask { input: Vec, cancellation_token: CancellationToken, ) -> Option { + let _ = session + .session + .services + .otel_manager + .counter("codex.task.review", 1, &[]); + // Start sub-codex conversation and get the receiver for events. let output = match start_review_conversation( session.clone(), @@ -85,14 +91,13 @@ async fn start_review_conversation( // re-enable blocked tools (web search, view image). sub_agent_config .features - .disable(crate::features::Feature::WebSearchRequest) - .disable(crate::features::Feature::ViewImageTool); + .disable(crate::features::Feature::WebSearchRequest); // Set explicit review rubric for the sub-agent sub_agent_config.base_instructions = Some(crate::REVIEW_PROMPT.to_string()); sub_agent_config.model = Some(config.review_model.clone()); - (run_codex_conversation_one_shot( + (run_codex_thread_one_shot( sub_agent_config, session.auth_manager(), session.models_manager(), diff --git a/codex-rs/core/src/tasks/undo.rs b/codex-rs/core/src/tasks/undo.rs index 5da7edd16fa..86232c094ce 100644 --- a/codex-rs/core/src/tasks/undo.rs +++ b/codex-rs/core/src/tasks/undo.rs @@ -38,6 +38,11 @@ impl SessionTask for UndoTask { _input: Vec, cancellation_token: CancellationToken, ) -> Option { + let _ = session + .session + .services + .otel_manager + .counter("codex.task.undo", 1, &[]); let sess = session.clone_session(); sess.send_event( ctx.as_ref(), diff --git a/codex-rs/core/src/tasks/user_shell.rs b/codex-rs/core/src/tasks/user_shell.rs index aec09514ca3..e76f70253ec 100644 --- a/codex-rs/core/src/tasks/user_shell.rs +++ b/codex-rs/core/src/tasks/user_shell.rs @@ -58,6 +58,12 @@ impl SessionTask for UserShellCommandTask { _input: Vec, cancellation_token: CancellationToken, ) -> Option { + let _ = session + .session + .services + .otel_manager + .counter("codex.task.user_shell", 1, &[]); + let event = EventMsg::TaskStarted(TaskStartedEvent { model_context_window: turn_context.client.get_model_context_window(), }); diff --git a/codex-rs/core/src/conversation_manager.rs b/codex-rs/core/src/thread_manager.rs similarity index 58% rename from codex-rs/core/src/conversation_manager.rs rename to codex-rs/core/src/thread_manager.rs index 5093e03c60f..2ef96f9ad33 100644 --- a/codex-rs/core/src/conversation_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -3,10 +3,11 @@ use crate::AuthManager; use crate::CodexAuth; #[cfg(any(test, feature = "test-support"))] use crate::ModelProviderInfo; +use crate::agent::AgentControl; use crate::codex::Codex; use crate::codex::CodexSpawnOk; use crate::codex::INITIAL_SUBMIT_ID; -use crate::codex_conversation::CodexConversation; +use crate::codex_thread::CodexThread; use crate::config::Config; use crate::error::CodexErr; use crate::error::Result as CodexResult; @@ -15,12 +16,12 @@ use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::SessionConfiguredEvent; use crate::rollout::RolloutRecorder; +use crate::rollout::truncation; use crate::skills::SkillsManager; -use codex_protocol::ConversationId; -use codex_protocol::items::TurnItem; -use codex_protocol::models::ResponseItem; +use codex_protocol::ThreadId; use codex_protocol::openai_models::ModelPreset; use codex_protocol::protocol::InitialHistory; +use codex_protocol::protocol::Op; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::SessionSource; use std::collections::HashMap; @@ -30,35 +31,50 @@ use std::sync::Arc; use tempfile::TempDir; use tokio::sync::RwLock; -/// Represents a newly created Codex conversation, including the first event +/// Represents a newly created Codex thread (formerly called a conversation), including the first event /// (which is [`EventMsg::SessionConfigured`]). -pub struct NewConversation { - pub conversation_id: ConversationId, - pub conversation: Arc, +pub struct NewThread { + pub thread_id: ThreadId, + pub thread: Arc, pub session_configured: SessionConfiguredEvent, } -/// [`ConversationManager`] is responsible for creating conversations and -/// maintaining them in memory. -pub struct ConversationManager { - conversations: Arc>>>, +/// [`ThreadManager`] is responsible for creating threads and maintaining +/// them in memory. +pub struct ThreadManager { + state: Arc, + #[cfg(any(test, feature = "test-support"))] + _test_codex_home_guard: Option, +} + +/// Shared, `Arc`-owned state for [`ThreadManager`]. This `Arc` is required to have a single +/// `Arc` reference that can be downgraded to by `AgentControl` while preventing every single +/// function to require an `Arc<&Self>`. +pub(crate) struct ThreadManagerState { + threads: Arc>>>, auth_manager: Arc, models_manager: Arc, skills_manager: Arc, session_source: SessionSource, - #[cfg(any(test, feature = "test-support"))] - _test_codex_home_guard: Option, } -impl ConversationManager { - pub fn new(auth_manager: Arc, session_source: SessionSource) -> Self { - let skills_manager = Arc::new(SkillsManager::new(auth_manager.codex_home().to_path_buf())); +impl ThreadManager { + pub fn new( + codex_home: PathBuf, + auth_manager: Arc, + session_source: SessionSource, + ) -> Self { Self { - conversations: Arc::new(RwLock::new(HashMap::new())), - auth_manager: auth_manager.clone(), - session_source, - models_manager: Arc::new(ModelsManager::new(auth_manager)), - skills_manager, + state: Arc::new(ThreadManagerState { + threads: Arc::new(RwLock::new(HashMap::new())), + models_manager: Arc::new(ModelsManager::new( + codex_home.clone(), + auth_manager.clone(), + )), + skills_manager: Arc::new(SkillsManager::new(codex_home)), + auth_manager, + session_source, + }), #[cfg(any(test, feature = "test-support"))] _test_codex_home_guard: None, } @@ -83,213 +99,199 @@ impl ConversationManager { provider: ModelProviderInfo, codex_home: PathBuf, ) -> Self { - let auth_manager = crate::AuthManager::from_auth_for_testing_with_home(auth, codex_home); - let skills_manager = Arc::new(SkillsManager::new(auth_manager.codex_home().to_path_buf())); + let auth_manager = AuthManager::from_auth_for_testing(auth); Self { - conversations: Arc::new(RwLock::new(HashMap::new())), - auth_manager: auth_manager.clone(), - session_source: SessionSource::Exec, - models_manager: Arc::new(ModelsManager::with_provider(auth_manager, provider)), - skills_manager, + state: Arc::new(ThreadManagerState { + threads: Arc::new(RwLock::new(HashMap::new())), + models_manager: Arc::new(ModelsManager::with_provider( + codex_home.clone(), + auth_manager.clone(), + provider, + )), + skills_manager: Arc::new(SkillsManager::new(codex_home)), + auth_manager, + session_source: SessionSource::Exec, + }), _test_codex_home_guard: None, } } pub fn session_source(&self) -> SessionSource { - self.session_source.clone() + self.state.session_source.clone() } pub fn skills_manager(&self) -> Arc { - self.skills_manager.clone() + self.state.skills_manager.clone() } - pub async fn new_conversation(&self, config: Config) -> CodexResult { - self.spawn_conversation( - config, - self.auth_manager.clone(), - self.models_manager.clone(), - ) - .await + pub fn get_models_manager(&self) -> Arc { + self.state.models_manager.clone() } - async fn spawn_conversation( - &self, - config: Config, - auth_manager: Arc, - models_manager: Arc, - ) -> CodexResult { - let CodexSpawnOk { - codex, - conversation_id, - } = Codex::spawn( - config, - auth_manager, - models_manager, - self.skills_manager.clone(), - InitialHistory::New, - self.session_source.clone(), - ) - .await?; - self.finalize_spawn(codex, conversation_id).await + pub async fn list_models(&self, config: &Config) -> Vec { + self.state.models_manager.list_models(config).await } - async fn finalize_spawn( - &self, - codex: Codex, - conversation_id: ConversationId, - ) -> CodexResult { - // The first event must be `SessionInitialized`. Validate and forward it - // to the caller so that they can display it in the conversation - // history. - let event = codex.next_event().await?; - let session_configured = match event { - Event { - id, - msg: EventMsg::SessionConfigured(session_configured), - } if id == INITIAL_SUBMIT_ID => session_configured, - _ => { - return Err(CodexErr::SessionConfiguredNotFirstEvent); - } - }; - - let conversation = Arc::new(CodexConversation::new( - codex, - session_configured.rollout_path.clone(), - )); - self.conversations - .write() - .await - .insert(conversation_id, conversation.clone()); + pub async fn list_thread_ids(&self) -> Vec { + self.state.threads.read().await.keys().copied().collect() + } - Ok(NewConversation { - conversation_id, - conversation, - session_configured, - }) + pub async fn get_thread(&self, thread_id: ThreadId) -> CodexResult> { + self.state.get_thread(thread_id).await } - pub async fn get_conversation( - &self, - conversation_id: ConversationId, - ) -> CodexResult> { - let conversations = self.conversations.read().await; - conversations - .get(&conversation_id) - .cloned() - .ok_or_else(|| CodexErr::ConversationNotFound(conversation_id)) + pub async fn start_thread(&self, config: Config) -> CodexResult { + self.state + .spawn_thread( + config, + InitialHistory::New, + Arc::clone(&self.state.auth_manager), + self.agent_control(), + ) + .await } - pub async fn resume_conversation_from_rollout( + pub async fn resume_thread_from_rollout( &self, config: Config, rollout_path: PathBuf, auth_manager: Arc, - ) -> CodexResult { + ) -> CodexResult { let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?; - self.resume_conversation_with_history(config, initial_history, auth_manager) + self.resume_thread_with_history(config, initial_history, auth_manager) .await } - pub async fn resume_conversation_with_history( + pub async fn resume_thread_with_history( &self, config: Config, initial_history: InitialHistory, auth_manager: Arc, - ) -> CodexResult { - let CodexSpawnOk { - codex, - conversation_id, - } = Codex::spawn( - config, - auth_manager, - self.models_manager.clone(), - self.skills_manager.clone(), - initial_history, - self.session_source.clone(), - ) - .await?; - self.finalize_spawn(codex, conversation_id).await + ) -> CodexResult { + self.state + .spawn_thread(config, initial_history, auth_manager, self.agent_control()) + .await } - /// Removes the conversation from the manager's internal map, though the - /// conversation is stored as `Arc`, it is possible that - /// other references to it exist elsewhere. Returns the conversation if the - /// conversation was found and removed. - pub async fn remove_conversation( - &self, - conversation_id: &ConversationId, - ) -> Option> { - self.conversations.write().await.remove(conversation_id) + /// Removes the thread from the manager's internal map, though the thread is stored + /// as `Arc`, it is possible that other references to it exist elsewhere. + /// Returns the thread if the thread was found and removed. + pub async fn remove_thread(&self, thread_id: &ThreadId) -> Option> { + self.state.threads.write().await.remove(thread_id) } - /// Fork an existing conversation by taking messages up to the given position - /// (not including the message at the given position) and starting a new - /// conversation with identical configuration (unless overridden by the - /// caller's `config`). The new conversation will have a fresh id. - pub async fn fork_conversation( + /// Fork an existing thread by taking messages up to the given position (not including + /// the message at the given position) and starting a new thread with identical + /// configuration (unless overridden by the caller's `config`). The new thread will have + /// a fresh id. + pub async fn fork_thread( &self, nth_user_message: usize, config: Config, path: PathBuf, - ) -> CodexResult { - // Compute the prefix up to the cut point. + ) -> CodexResult { let history = RolloutRecorder::get_rollout_history(&path).await?; let history = truncate_before_nth_user_message(history, nth_user_message); + self.state + .spawn_thread( + config, + history, + Arc::clone(&self.state.auth_manager), + self.agent_control(), + ) + .await + } + + fn agent_control(&self) -> AgentControl { + AgentControl::new(Arc::downgrade(&self.state)) + } +} - // Spawn a new conversation with the computed initial history. - let auth_manager = self.auth_manager.clone(); +impl ThreadManagerState { + pub(crate) async fn get_thread(&self, thread_id: ThreadId) -> CodexResult> { + let threads = self.threads.read().await; + threads + .get(&thread_id) + .cloned() + .ok_or_else(|| CodexErr::ThreadNotFound(thread_id)) + } + + pub(crate) async fn send_op(&self, thread_id: ThreadId, op: Op) -> CodexResult { + self.get_thread(thread_id).await?.submit(op).await + } + + #[allow(dead_code)] // Used by upcoming multi-agent tooling. + pub(crate) async fn spawn_new_thread( + &self, + config: Config, + agent_control: AgentControl, + ) -> CodexResult { + self.spawn_thread( + config, + InitialHistory::New, + Arc::clone(&self.auth_manager), + agent_control, + ) + .await + } + + pub(crate) async fn spawn_thread( + &self, + config: Config, + initial_history: InitialHistory, + auth_manager: Arc, + agent_control: AgentControl, + ) -> CodexResult { let CodexSpawnOk { - codex, - conversation_id, + codex, thread_id, .. } = Codex::spawn( config, auth_manager, - self.models_manager.clone(), - self.skills_manager.clone(), - history, + Arc::clone(&self.models_manager), + Arc::clone(&self.skills_manager), + initial_history, self.session_source.clone(), + agent_control, ) .await?; - - self.finalize_spawn(codex, conversation_id).await + self.finalize_thread_spawn(codex, thread_id).await } - pub async fn list_models(&self, config: &Config) -> Vec { - self.models_manager.list_models(config).await - } + async fn finalize_thread_spawn( + &self, + codex: Codex, + thread_id: ThreadId, + ) -> CodexResult { + let event = codex.next_event().await?; + let session_configured = match event { + Event { + id, + msg: EventMsg::SessionConfigured(session_configured), + } if id == INITIAL_SUBMIT_ID => session_configured, + _ => { + return Err(CodexErr::SessionConfiguredNotFirstEvent); + } + }; - pub fn get_models_manager(&self) -> Arc { - self.models_manager.clone() + let thread = Arc::new(CodexThread::new( + codex, + session_configured.rollout_path.clone(), + )); + self.threads.write().await.insert(thread_id, thread.clone()); + + Ok(NewThread { + thread_id, + thread, + session_configured, + }) } } /// Return a prefix of `items` obtained by cutting strictly before the nth user message /// (0-based) and all items that follow it. fn truncate_before_nth_user_message(history: InitialHistory, n: usize) -> InitialHistory { - // Work directly on rollout items, and cut the vector at the nth user message input. let items: Vec = history.get_rollout_items(); - - // Find indices of user message inputs in rollout order. - let mut user_positions: Vec = Vec::new(); - for (idx, item) in items.iter().enumerate() { - if let RolloutItem::ResponseItem(item @ ResponseItem::Message { .. }) = item - && matches!( - crate::event_mapping::parse_turn_item(item), - Some(TurnItem::UserMessage(_)) - ) - { - user_positions.push(idx); - } - } - - // If fewer than or equal to n user messages exist, treat as empty (out of range). - if user_positions.len() <= n { - return InitialHistory::New; - } - - // Cut strictly before the nth user message (do not keep the nth itself). - let cut_idx = user_positions[n]; - let rolled: Vec = items.into_iter().take(cut_idx).collect(); + let rolled = truncation::truncate_rollout_before_nth_user_message_from_start(&items, n); if rolled.is_empty() { InitialHistory::New @@ -345,14 +347,13 @@ mod tests { }, ResponseItem::FunctionCall { id: None, + call_id: "c1".to_string(), name: "tool".to_string(), arguments: "{}".to_string(), - call_id: "c1".to_string(), }, assistant_msg("a4"), ]; - // Wrap as InitialHistory::Forked with response items only. let initial: Vec = items .iter() .cloned() diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs index cdfc575cd9b..02aa50d7b61 100644 --- a/codex-rs/core/src/tools/events.rs +++ b/codex-rs/core/src/tools/events.rs @@ -305,7 +305,12 @@ impl ToolEmitter { // Normalize common rejection messages for exec tools so tests and // users see a clear, consistent phrase. let normalized = if msg == "rejected by user" { - "exec command rejected by user".to_string() + match self { + Self::Shell { .. } | Self::UnifiedExec { .. } => { + "exec command rejected by user".to_string() + } + Self::ApplyPatch { .. } => "patch rejected by user".to_string(), + } } else { msg }; diff --git a/codex-rs/core/src/tools/handlers/apply_patch.rs b/codex-rs/core/src/tools/handlers/apply_patch.rs index 14a481f4eae..adf76e68251 100644 --- a/codex-rs/core/src/tools/handlers/apply_patch.rs +++ b/codex-rs/core/src/tools/handlers/apply_patch.rs @@ -26,11 +26,38 @@ use crate::tools::sandboxing::ToolCtx; use crate::tools::spec::ApplyPatchToolArgs; use crate::tools::spec::JsonSchema; use async_trait::async_trait; +use codex_apply_patch::ApplyPatchAction; +use codex_apply_patch::ApplyPatchFileChange; +use codex_utils_absolute_path::AbsolutePathBuf; pub struct ApplyPatchHandler; const APPLY_PATCH_LARK_GRAMMAR: &str = include_str!("tool_apply_patch.lark"); +fn file_paths_for_action(action: &ApplyPatchAction) -> Vec { + let mut keys = Vec::new(); + let cwd = action.cwd.as_path(); + + for (path, change) in action.changes() { + if let Some(key) = to_abs_path(cwd, path) { + keys.push(key); + } + + if let ApplyPatchFileChange::Update { move_path, .. } = change + && let Some(dest) = move_path + && let Some(key) = to_abs_path(cwd, dest) + { + keys.push(key); + } + } + + keys +} + +fn to_abs_path(cwd: &Path, path: &Path) -> Option { + AbsolutePathBuf::resolve_path_against_base(path, cwd).ok() +} + #[async_trait] impl ToolHandler for ApplyPatchHandler { fn kind(&self) -> ToolKind { @@ -81,9 +108,7 @@ impl ToolHandler for ApplyPatchHandler { let command = vec!["apply_patch".to_string(), patch_input.clone()]; match codex_apply_patch::maybe_parse_apply_patch_verified(&command, &cwd) { codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => { - match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes) - .await - { + match apply_patch::apply_patch(turn.as_ref(), changes).await { InternalApplyPatchInvocation::Output(item) => { let content = item?; Ok(ToolOutput::Function { @@ -93,10 +118,10 @@ impl ToolHandler for ApplyPatchHandler { }) } InternalApplyPatchInvocation::DelegateToExec(apply) => { - let emitter = ToolEmitter::apply_patch( - convert_apply_patch_to_protocol(&apply.action), - !apply.user_explicitly_approved_this_action, - ); + let changes = convert_apply_patch_to_protocol(&apply.action); + let file_paths = file_paths_for_action(&apply.action); + let emitter = + ToolEmitter::apply_patch(changes.clone(), apply.auto_approved); let event_ctx = ToolEventCtx::new( session.as_ref(), turn.as_ref(), @@ -106,10 +131,11 @@ impl ToolHandler for ApplyPatchHandler { emitter.begin(event_ctx).await; let req = ApplyPatchRequest { - patch: apply.action.patch.clone(), - cwd: apply.action.cwd.clone(), + action: apply.action, + file_paths, + changes, + exec_approval_requirement: apply.exec_approval_requirement, timeout_ms: None, - user_explicitly_approved: apply.user_explicitly_approved_this_action, codex_exe: turn.codex_linux_sandbox_exe.clone(), }; @@ -178,7 +204,7 @@ pub(crate) async fn intercept_apply_patch( turn, ) .await; - match apply_patch::apply_patch(session, turn, call_id, changes).await { + match apply_patch::apply_patch(turn, changes).await { InternalApplyPatchInvocation::Output(item) => { let content = item?; Ok(Some(ToolOutput::Function { @@ -188,19 +214,19 @@ pub(crate) async fn intercept_apply_patch( })) } InternalApplyPatchInvocation::DelegateToExec(apply) => { - let emitter = ToolEmitter::apply_patch( - convert_apply_patch_to_protocol(&apply.action), - !apply.user_explicitly_approved_this_action, - ); + let changes = convert_apply_patch_to_protocol(&apply.action); + let approval_keys = file_paths_for_action(&apply.action); + let emitter = ToolEmitter::apply_patch(changes.clone(), apply.auto_approved); let event_ctx = ToolEventCtx::new(session, turn, call_id, tracker.as_ref().copied()); emitter.begin(event_ctx).await; let req = ApplyPatchRequest { - patch: apply.action.patch.clone(), - cwd: apply.action.cwd.clone(), + action: apply.action, + file_paths: approval_keys, + changes, + exec_approval_requirement: apply.exec_approval_requirement, timeout_ms, - user_explicitly_approved: apply.user_explicitly_approved_this_action, codex_exe: turn.codex_linux_sandbox_exe.clone(), }; @@ -342,3 +368,35 @@ It is important to remember: }, }) } + +#[cfg(test)] +mod tests { + use super::*; + use codex_apply_patch::MaybeApplyPatchVerified; + use pretty_assertions::assert_eq; + use tempfile::TempDir; + + #[test] + fn approval_keys_include_move_destination() { + let tmp = TempDir::new().expect("tmp"); + let cwd = tmp.path(); + std::fs::create_dir_all(cwd.join("old")).expect("create old dir"); + std::fs::create_dir_all(cwd.join("renamed/dir")).expect("create dest dir"); + std::fs::write(cwd.join("old/name.txt"), "old content\n").expect("write old file"); + let patch = r#"*** Begin Patch +*** Update File: old/name.txt +*** Move to: renamed/dir/name.txt +@@ +-old content ++new content +*** End Patch"#; + let argv = vec!["apply_patch".to_string(), patch.to_string()]; + let action = match codex_apply_patch::maybe_parse_apply_patch_verified(&argv, cwd) { + MaybeApplyPatchVerified::Body(action) => action, + other => panic!("expected patch body, got: {other:?}"), + }; + + let keys = file_paths_for_action(&action); + assert_eq!(keys.len(), 2); + } +} diff --git a/codex-rs/core/src/tools/handlers/list_dir.rs b/codex-rs/core/src/tools/handlers/list_dir.rs index 1c08243f729..ffeed98f7a3 100644 --- a/codex-rs/core/src/tools/handlers/list_dir.rs +++ b/codex-rs/core/src/tools/handlers/list_dir.rs @@ -125,6 +125,8 @@ async fn list_dir_slice( return Ok(Vec::new()); } + entries.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + let start_index = offset - 1; if start_index >= entries.len() { return Err(FunctionCallError::RespondToModel( @@ -135,11 +137,10 @@ async fn list_dir_slice( let remaining_entries = entries.len() - start_index; let capped_limit = limit.min(remaining_entries); let end_index = start_index + capped_limit; - let mut selected_entries = entries[start_index..end_index].to_vec(); - selected_entries.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + let selected_entries = &entries[start_index..end_index]; let mut formatted = Vec::with_capacity(selected_entries.len()); - for entry in &selected_entries { + for entry in selected_entries { formatted.push(format_entry_line(entry)); } @@ -273,6 +274,7 @@ impl From<&FileType> for DirEntryKind { #[cfg(test)] mod tests { use super::*; + use pretty_assertions::assert_eq; use tempfile::tempdir; #[tokio::test] @@ -404,6 +406,44 @@ mod tests { ); } + #[tokio::test] + async fn paginates_in_sorted_order() { + let temp = tempdir().expect("create tempdir"); + let dir_path = temp.path(); + + let dir_a = dir_path.join("a"); + let dir_b = dir_path.join("b"); + tokio::fs::create_dir(&dir_a).await.expect("create a"); + tokio::fs::create_dir(&dir_b).await.expect("create b"); + + tokio::fs::write(dir_a.join("a_child.txt"), b"a") + .await + .expect("write a child"); + tokio::fs::write(dir_b.join("b_child.txt"), b"b") + .await + .expect("write b child"); + + let first_page = list_dir_slice(dir_path, 1, 2, 2) + .await + .expect("list page one"); + assert_eq!( + first_page, + vec![ + "a/".to_string(), + " a_child.txt".to_string(), + "More than 2 entries found".to_string() + ] + ); + + let second_page = list_dir_slice(dir_path, 3, 2, 2) + .await + .expect("list page two"); + assert_eq!( + second_page, + vec!["b/".to_string(), " b_child.txt".to_string()] + ); + } + #[tokio::test] async fn handles_large_limit_without_overflow() { let temp = tempdir().expect("create tempdir"); @@ -450,7 +490,7 @@ mod tests { } #[tokio::test] - async fn bfs_truncation() -> anyhow::Result<()> { + async fn truncation_respects_sorted_order() -> anyhow::Result<()> { let temp = tempdir()?; let dir_path = temp.path(); let nested = dir_path.join("nested"); @@ -467,7 +507,7 @@ mod tests { vec![ "nested/".to_string(), " child.txt".to_string(), - "root.txt".to_string(), + " deeper/".to_string(), "More than 3 entries found".to_string() ] ); diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 00418d8a65e..8ae6b1a8de8 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -1,7 +1,6 @@ use crate::function_tool::FunctionCallError; use crate::is_safe_command::is_known_safe_command; use crate::protocol::EventMsg; -use crate::protocol::ExecCommandSource; use crate::protocol::TerminalInteractionEvent; use crate::sandboxing::SandboxPermissions; use crate::shell::Shell; @@ -9,16 +8,13 @@ use crate::shell::get_shell_by_model_provided_path; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; -use crate::tools::events::ToolEmitter; -use crate::tools::events::ToolEventCtx; -use crate::tools::events::ToolEventStage; use crate::tools::handlers::apply_patch::intercept_apply_patch; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use crate::unified_exec::ExecCommandRequest; use crate::unified_exec::UnifiedExecContext; +use crate::unified_exec::UnifiedExecProcessManager; use crate::unified_exec::UnifiedExecResponse; -use crate::unified_exec::UnifiedExecSessionManager; use crate::unified_exec::WriteStdinRequest; use async_trait::async_trait; use serde::Deserialize; @@ -116,7 +112,7 @@ impl ToolHandler for UnifiedExecHandler { } }; - let manager: &UnifiedExecSessionManager = &session.services.unified_exec_manager; + let manager: &UnifiedExecProcessManager = &session.services.unified_exec_manager; let context = UnifiedExecContext::new(session.clone(), turn.clone(), call_id.clone()); let response = match tool_name.as_str() { @@ -172,20 +168,6 @@ impl ToolHandler for UnifiedExecHandler { return Ok(output); } - let event_ctx = ToolEventCtx::new( - context.session.as_ref(), - context.turn.as_ref(), - &context.call_id, - None, - ); - let emitter = ToolEmitter::unified_exec( - &command, - cwd.clone(), - ExecCommandSource::UnifiedExecStartup, - Some(process_id.clone()), - ); - emitter.emit(event_ctx, ToolEventStage::Begin).await; - manager .exec_command( ExecCommandRequest { diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index 7853617238e..f0810916a55 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -17,6 +17,7 @@ use crate::tools::sandboxing::ToolCtx; use crate::tools::sandboxing::ToolError; use crate::tools::sandboxing::ToolRuntime; use crate::tools::sandboxing::default_exec_approval_requirement; +use codex_otel::ToolDecisionSource; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; @@ -45,8 +46,8 @@ impl ToolOrchestrator { let otel = turn_ctx.client.get_otel_manager(); let otel_tn = &tool_ctx.tool_name; let otel_ci = &tool_ctx.call_id; - let otel_user = codex_otel::otel_manager::ToolDecisionSource::User; - let otel_cfg = codex_otel::otel_manager::ToolDecisionSource::Config; + let otel_user = ToolDecisionSource::User; + let otel_cfg = ToolDecisionSource::Config; // 1) Approval let mut already_approved = false; diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs index 26d04f578c5..7b9d0dccc5d 100644 --- a/codex-rs/core/src/tools/runtimes/apply_patch.rs +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -11,6 +11,7 @@ use crate::sandboxing::SandboxPermissions; use crate::sandboxing::execute_env; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ExecApprovalRequirement; use crate::tools::sandboxing::SandboxAttempt; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; @@ -18,30 +19,28 @@ use crate::tools::sandboxing::ToolCtx; use crate::tools::sandboxing::ToolError; use crate::tools::sandboxing::ToolRuntime; use crate::tools::sandboxing::with_cached_approval; +use codex_apply_patch::ApplyPatchAction; use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::FileChange; use codex_protocol::protocol::ReviewDecision; +use codex_utils_absolute_path::AbsolutePathBuf; use futures::future::BoxFuture; use std::collections::HashMap; use std::path::PathBuf; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct ApplyPatchRequest { - pub patch: String, - pub cwd: PathBuf, + pub action: ApplyPatchAction, + pub file_paths: Vec, + pub changes: std::collections::HashMap, + pub exec_approval_requirement: ExecApprovalRequirement, pub timeout_ms: Option, - pub user_explicitly_approved: bool, pub codex_exe: Option, } #[derive(Default)] pub struct ApplyPatchRuntime; -#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)] -pub(crate) struct ApprovalKey { - patch: String, - cwd: PathBuf, -} - impl ApplyPatchRuntime { pub fn new() -> Self { Self @@ -58,8 +57,8 @@ impl ApplyPatchRuntime { let program = exe.to_string_lossy().to_string(); Ok(CommandSpec { program, - args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()], - cwd: req.cwd.clone(), + args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.action.patch.clone()], + cwd: req.action.cwd.clone(), expiration: req.timeout_ms.into(), // Run apply_patch with a minimal environment for determinism and to avoid leaks. env: HashMap::new(), @@ -87,13 +86,10 @@ impl Sandboxable for ApplyPatchRuntime { } impl Approvable for ApplyPatchRuntime { - type ApprovalKey = ApprovalKey; + type ApprovalKey = AbsolutePathBuf; - fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey { - ApprovalKey { - patch: req.patch.clone(), - cwd: req.cwd.clone(), - } + fn approval_keys(&self, req: &ApplyPatchRequest) -> Vec { + req.file_paths.clone() } fn start_approval_async<'a>( @@ -101,31 +97,25 @@ impl Approvable for ApplyPatchRuntime { req: &'a ApplyPatchRequest, ctx: ApprovalCtx<'a>, ) -> BoxFuture<'a, ReviewDecision> { - let key = self.approval_key(req); let session = ctx.session; let turn = ctx.turn; let call_id = ctx.call_id.to_string(); - let cwd = req.cwd.clone(); let retry_reason = ctx.retry_reason.clone(); - let user_explicitly_approved = req.user_explicitly_approved; + let approval_keys = self.approval_keys(req); + let changes = req.changes.clone(); Box::pin(async move { - with_cached_approval(&session.services, key, move || async move { - if let Some(reason) = retry_reason { - session - .request_command_approval( - turn, - call_id, - vec!["apply_patch".to_string()], - cwd, - Some(reason), - None, - ) - .await - } else if user_explicitly_approved { - ReviewDecision::ApprovedForSession - } else { - ReviewDecision::Approved - } + if let Some(reason) = retry_reason { + let rx_approve = session + .request_patch_approval(turn, call_id, changes.clone(), Some(reason), None) + .await; + return rx_approve.await.unwrap_or_default(); + } + + with_cached_approval(&session.services, approval_keys, || async move { + let rx_approve = session + .request_patch_approval(turn, call_id, changes, None, None) + .await; + rx_approve.await.unwrap_or_default() }) .await }) @@ -134,6 +124,17 @@ impl Approvable for ApplyPatchRuntime { fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool { !matches!(policy, AskForApproval::Never) } + + // apply_patch approvals are decided upstream by assess_patch_safety. + // + // This override ensures the orchestrator runs the patch approval flow when required instead + // of falling back to the global exec approval policy. + fn exec_approval_requirement( + &self, + req: &ApplyPatchRequest, + ) -> Option { + Some(req.exec_approval_requirement.clone()) + } } impl ToolRuntime for ApplyPatchRuntime { diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs index 528efad7409..49052bc06b9 100644 --- a/codex-rs/core/src/tools/runtimes/shell.rs +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -74,12 +74,12 @@ impl Sandboxable for ShellRuntime { impl Approvable for ShellRuntime { type ApprovalKey = ApprovalKey; - fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey { - ApprovalKey { + fn approval_keys(&self, req: &ShellRequest) -> Vec { + vec![ApprovalKey { command: req.command.clone(), cwd: req.cwd.clone(), sandbox_permissions: req.sandbox_permissions, - } + }] } fn start_approval_async<'a>( @@ -87,7 +87,7 @@ impl Approvable for ShellRuntime { req: &'a ShellRequest, ctx: ApprovalCtx<'a>, ) -> BoxFuture<'a, ReviewDecision> { - let key = self.approval_key(req); + let keys = self.approval_keys(req); let command = req.command.clone(); let cwd = req.cwd.clone(); let reason = ctx @@ -98,7 +98,7 @@ impl Approvable for ShellRuntime { let turn = ctx.turn; let call_id = ctx.call_id.to_string(); Box::pin(async move { - with_cached_approval(&session.services, key, move || async move { + with_cached_approval(&session.services, keys, move || async move { session .request_command_approval( turn, diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index da1f518e8fb..47e9e5ca125 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -2,7 +2,7 @@ Runtime: unified exec Handles approval + sandbox orchestration for unified exec requests, delegating to -the session manager to spawn PTYs once an ExecEnv is prepared. +the process manager to spawn PTYs once an ExecEnv is prepared. */ use crate::error::CodexErr; use crate::error::SandboxErr; @@ -25,8 +25,8 @@ use crate::tools::sandboxing::ToolError; use crate::tools::sandboxing::ToolRuntime; use crate::tools::sandboxing::with_cached_approval; use crate::unified_exec::UnifiedExecError; -use crate::unified_exec::UnifiedExecSession; -use crate::unified_exec::UnifiedExecSessionManager; +use crate::unified_exec::UnifiedExecProcess; +use crate::unified_exec::UnifiedExecProcessManager; use codex_protocol::protocol::ReviewDecision; use futures::future::BoxFuture; use std::collections::HashMap; @@ -50,7 +50,7 @@ pub struct UnifiedExecApprovalKey { } pub struct UnifiedExecRuntime<'a> { - manager: &'a UnifiedExecSessionManager, + manager: &'a UnifiedExecProcessManager, } impl UnifiedExecRequest { @@ -74,7 +74,7 @@ impl UnifiedExecRequest { } impl<'a> UnifiedExecRuntime<'a> { - pub fn new(manager: &'a UnifiedExecSessionManager) -> Self { + pub fn new(manager: &'a UnifiedExecProcessManager) -> Self { Self { manager } } } @@ -92,12 +92,12 @@ impl Sandboxable for UnifiedExecRuntime<'_> { impl Approvable for UnifiedExecRuntime<'_> { type ApprovalKey = UnifiedExecApprovalKey; - fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey { - UnifiedExecApprovalKey { + fn approval_keys(&self, req: &UnifiedExecRequest) -> Vec { + vec![UnifiedExecApprovalKey { command: req.command.clone(), cwd: req.cwd.clone(), sandbox_permissions: req.sandbox_permissions, - } + }] } fn start_approval_async<'b>( @@ -105,7 +105,7 @@ impl Approvable for UnifiedExecRuntime<'_> { req: &'b UnifiedExecRequest, ctx: ApprovalCtx<'b>, ) -> BoxFuture<'b, ReviewDecision> { - let key = self.approval_key(req); + let keys = self.approval_keys(req); let session = ctx.session; let turn = ctx.turn; let call_id = ctx.call_id.to_string(); @@ -116,7 +116,7 @@ impl Approvable for UnifiedExecRuntime<'_> { .clone() .or_else(|| req.justification.clone()); Box::pin(async move { - with_cached_approval(&session.services, key, || async move { + with_cached_approval(&session.services, keys, || async move { session .request_command_approval( turn, @@ -158,13 +158,13 @@ impl Approvable for UnifiedExecRuntime<'_> { } } -impl<'a> ToolRuntime for UnifiedExecRuntime<'a> { +impl<'a> ToolRuntime for UnifiedExecRuntime<'a> { async fn run( &mut self, req: &UnifiedExecRequest, attempt: &SandboxAttempt<'_>, ctx: &ToolCtx<'_>, - ) -> Result { + ) -> Result { let base_command = &req.command; let session_shell = ctx.session.user_shell(); let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref()); diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs index 14dda62a8a6..82af60e3d6f 100644 --- a/codex-rs/core/src/tools/sandboxing.rs +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -49,28 +49,44 @@ impl ApprovalStore { } } +/// Takes a vector of approval keys and returns a ReviewDecision. +/// There will be one key in most cases, but apply_patch can modify multiple files at once. +/// +/// - If all keys are already approved for session, we skip prompting. +/// - If the user approves for session, we store the decision for each key individually +/// so future requests touching any subset can also skip prompting. pub(crate) async fn with_cached_approval( services: &SessionServices, - key: K, + keys: Vec, fetch: F, ) -> ReviewDecision where - K: Serialize + Clone, + K: Serialize, F: FnOnce() -> Fut, Fut: Future, { - { + // To be defensive here, don't bother with checking the cache if keys are empty. + if keys.is_empty() { + return fetch().await; + } + + let already_approved = { let store = services.tool_approvals.lock().await; - if let Some(decision) = store.get(&key) { - return decision; - } + keys.iter() + .all(|key| matches!(store.get(key), Some(ReviewDecision::ApprovedForSession))) + }; + + if already_approved { + return ReviewDecision::ApprovedForSession; } let decision = fetch().await; if matches!(decision, ReviewDecision::ApprovedForSession) { let mut store = services.tool_approvals.lock().await; - store.put(key, ReviewDecision::ApprovedForSession); + for key in keys { + store.put(key, ReviewDecision::ApprovedForSession); + } } decision @@ -161,7 +177,14 @@ pub(crate) enum SandboxOverride { pub(crate) trait Approvable { type ApprovalKey: Hash + Eq + Clone + Debug + Serialize; - fn approval_key(&self, req: &Req) -> Self::ApprovalKey; + // In most cases (shell, unified_exec), a request will have a single approval key. + // + // However, apply_patch needs session "approve once, don't ask again" semantics that + // apply to multiple atomic targets (e.g., apply_patch approves per file path). Returning + // a list of keys lets the runtime treat the request as approved-for-session only if + // *all* keys are already approved, while still caching approvals per-key so future + // requests touching a subset can be auto-approved. + fn approval_keys(&self, req: &Req) -> Vec; /// Some tools may request to skip the sandbox on the first attempt /// (e.g., when the request explicitly asks for escalated permissions). diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 0ac91755c22..846025d58e2 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -2,13 +2,13 @@ use crate::client_common::tools::ResponsesApiTool; use crate::client_common::tools::ToolSpec; use crate::features::Feature; use crate::features::Features; -use crate::models_manager::model_family::ModelFamily; use crate::tools::handlers::PLAN_TOOL; use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool; use crate::tools::handlers::apply_patch::create_apply_patch_json_tool; use crate::tools::registry::ToolRegistryBuilder; use codex_protocol::openai_models::ApplyPatchToolType; use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; use serde::Deserialize; use serde::Serialize; use serde_json::Value as JsonValue; @@ -21,24 +21,24 @@ pub(crate) struct ToolsConfig { pub shell_type: ConfigShellToolType, pub apply_patch_tool_type: Option, pub web_search_request: bool, - pub include_view_image_tool: bool, + pub web_search_cached: bool, pub experimental_supported_tools: Vec, } pub(crate) struct ToolsConfigParams<'a> { - pub(crate) model_family: &'a ModelFamily, + pub(crate) model_info: &'a ModelInfo, pub(crate) features: &'a Features, } impl ToolsConfig { pub fn new(params: &ToolsConfigParams) -> Self { let ToolsConfigParams { - model_family, + model_info, features, } = params; let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform); let include_web_search_request = features.enabled(Feature::WebSearchRequest); - let include_view_image_tool = features.enabled(Feature::ViewImageTool); + let include_web_search_cached = features.enabled(Feature::WebSearchCached); let shell_type = if !features.enabled(Feature::ShellTool) { ConfigShellToolType::Disabled @@ -50,10 +50,10 @@ impl ToolsConfig { ConfigShellToolType::ShellCommand } } else { - model_family.shell_type + model_info.shell_type }; - let apply_patch_tool_type = match model_family.apply_patch_tool_type { + let apply_patch_tool_type = match model_info.apply_patch_tool_type { Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform), Some(ApplyPatchToolType::Function) => Some(ApplyPatchToolType::Function), None => { @@ -69,8 +69,8 @@ impl ToolsConfig { shell_type, apply_patch_tool_type, web_search_request: include_web_search_request, - include_view_image_tool, - experimental_supported_tools: model_family.experimental_supported_tools.clone(), + web_search_cached: include_web_search_cached, + experimental_supported_tools: model_info.experimental_supported_tools.clone(), } } } @@ -405,7 +405,7 @@ fn create_view_image_tool() -> ToolSpec { ToolSpec::Function(ResponsesApiTool { name: "view_image".to_string(), description: - "Attach a local image (by filesystem path) to the conversation context for this turn." + "Attach a local image (by filesystem path) to the thread context for this turn." .to_string(), strict: false, parameters: JsonSchema::Object { @@ -1093,14 +1093,19 @@ pub(crate) fn build_specs( builder.register_handler("test_sync_tool", test_sync_handler); } - if config.web_search_request { - builder.push_spec(ToolSpec::WebSearch {}); + // Prefer web_search_cached flag over web_search_request + if config.web_search_cached { + builder.push_spec(ToolSpec::WebSearch { + external_web_access: Some(false), + }); + } else if config.web_search_request { + builder.push_spec(ToolSpec::WebSearch { + external_web_access: Some(true), + }); } - if config.include_view_image_tool { - builder.push_spec_with_parallel_support(create_view_image_tool(), true); - builder.register_handler("view_image", view_image_handler); - } + builder.push_spec_with_parallel_support(create_view_image_tool(), true); + builder.register_handler("view_image", view_image_handler); if let Some(mcp_tools) = mcp_tools { let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); @@ -1137,7 +1142,7 @@ mod tests { match tool { ToolSpec::Function(ResponsesApiTool { name, .. }) => name, ToolSpec::LocalShell {} => "local_shell", - ToolSpec::WebSearch {} => "web_search", + ToolSpec::WebSearch { .. } => "web_search", ToolSpec::Freeform(FreeformTool { name, .. }) => name, } } @@ -1215,20 +1220,19 @@ mod tests { ToolSpec::Function(ResponsesApiTool { parameters, .. }) => { strip_descriptions_schema(parameters); } - ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch {} => {} + ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch { .. } => {} } } #[test] fn test_full_toolset_specs_for_gpt5_codex_unified_exec_web_search() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); - features.enable(Feature::ViewImageTool); let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); let (tools, _) = build_specs(&config, None).build(); @@ -1259,7 +1263,9 @@ mod tests { create_read_mcp_resource_tool(), PLAN_TOOL.clone(), create_apply_patch_freeform_tool(), - ToolSpec::WebSearch {}, + ToolSpec::WebSearch { + external_web_access: Some(true), + }, create_view_image_tool(), ] { expected.insert(tool_name(&spec).to_string(), spec); @@ -1282,9 +1288,9 @@ mod tests { fn assert_model_tools(model_slug: &str, features: &Features, expected_tools: &[&str]) { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline(model_slug, &config); + let model_info = ModelsManager::construct_model_info_offline(model_slug, &config); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features, }); let (tools, _) = build_specs(&tools_config, Some(HashMap::new())).build(); @@ -1292,6 +1298,51 @@ mod tests { assert_eq!(&tool_names, &expected_tools,); } + #[test] + fn web_search_cached_sets_external_web_access_false() { + let config = test_config(); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::WebSearchCached); + + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + features: &features, + }); + let (tools, _) = build_specs(&tools_config, None).build(); + + let tool = find_tool(&tools, "web_search"); + assert_eq!( + tool.spec, + ToolSpec::WebSearch { + external_web_access: Some(false), + } + ); + } + + #[test] + fn web_search_cached_takes_precedence_over_web_search_request() { + let config = test_config(); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::WebSearchCached); + features.enable(Feature::WebSearchRequest); + + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + features: &features, + }); + let (tools, _) = build_specs(&tools_config, None).build(); + + let tool = find_tool(&tools, "web_search"); + assert_eq!( + tool.spec, + ToolSpec::WebSearch { + external_web_access: Some(false), + } + ); + } + #[test] fn test_build_specs_gpt5_codex_default() { assert_model_tools( @@ -1475,12 +1526,12 @@ mod tests { #[test] fn test_build_specs_default_shell_present() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("o3", &config); + let model_info = ModelsManager::construct_model_info_offline("o3", &config); let mut features = Features::with_defaults(); features.enable(Feature::WebSearchRequest); features.enable(Feature::UnifiedExec); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); let (tools, _) = build_specs(&tools_config, Some(HashMap::new())).build(); @@ -1497,12 +1548,11 @@ mod tests { #[ignore] fn test_parallel_support_flags() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); - features.disable(Feature::ViewImageTool); features.enable(Feature::UnifiedExec); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); let (tools, _) = build_specs(&tools_config, None).build(); @@ -1515,14 +1565,12 @@ mod tests { } #[test] - fn test_test_model_family_includes_sync_tool() { + fn test_test_model_info_includes_sync_tool() { let config = test_config(); - let model_family = - ModelsManager::construct_model_family_offline("test-gpt-5-codex", &config); - let mut features = Features::with_defaults(); - features.disable(Feature::ViewImageTool); + let model_info = ModelsManager::construct_model_info_offline("test-gpt-5-codex", &config); + let features = Features::with_defaults(); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); let (tools, _) = build_specs(&tools_config, None).build(); @@ -1548,12 +1596,12 @@ mod tests { #[test] fn test_build_specs_mcp_tools_converted() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("o3", &config); + let model_info = ModelsManager::construct_model_info_offline("o3", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); let (tools, _) = build_specs( @@ -1643,11 +1691,11 @@ mod tests { #[test] fn test_build_specs_mcp_tools_sorted_by_name() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("o3", &config); + let model_info = ModelsManager::construct_model_info_offline("o3", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); @@ -1719,12 +1767,12 @@ mod tests { #[test] fn test_mcp_tool_property_missing_type_defaults_to_string() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); @@ -1776,12 +1824,12 @@ mod tests { #[test] fn test_mcp_tool_integer_normalized_to_number() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); @@ -1829,13 +1877,13 @@ mod tests { #[test] fn test_mcp_tool_array_without_items_gets_default_string_items() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); features.enable(Feature::ApplyPatchFreeform); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); @@ -1886,12 +1934,12 @@ mod tests { #[test] fn test_mcp_tool_anyof_defaults_to_string() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); @@ -1998,12 +2046,12 @@ Examples of valid command strings: #[test] fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() { let config = test_config(); - let model_family = ModelsManager::construct_model_family_offline("gpt-5-codex", &config); + let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config); let mut features = Features::with_defaults(); features.enable(Feature::UnifiedExec); features.enable(Feature::WebSearchRequest); let tools_config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, + model_info: &model_info, features: &features, }); let (tools, _) = build_specs( diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 7cced9970a0..8150b994d00 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -2,7 +2,6 @@ //! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation //! used across the core crate. -use crate::config::Config; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::openai_models::TruncationMode; use codex_protocol::openai_models::TruncationPolicyConfig; @@ -47,27 +46,6 @@ impl TruncationPolicy { } } - pub fn new(config: &Config, truncation_policy: TruncationPolicy) -> Self { - let config_token_limit = config.tool_output_token_limit; - - match truncation_policy { - TruncationPolicy::Bytes(family_bytes) => { - if let Some(token_limit) = config_token_limit { - Self::Bytes(approx_bytes_for_tokens(token_limit)) - } else { - Self::Bytes(family_bytes) - } - } - TruncationPolicy::Tokens(family_tokens) => { - if let Some(token_limit) = config_token_limit { - Self::Tokens(token_limit) - } else { - Self::Tokens(family_tokens) - } - } - } - } - /// Returns a token budget derived from this policy. /// /// - For `Tokens`, this is the explicit token limit. @@ -313,7 +291,7 @@ pub(crate) fn approx_token_count(text: &str) -> usize { len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN } -fn approx_bytes_for_tokens(tokens: usize) -> usize { +pub(crate) fn approx_bytes_for_tokens(tokens: usize) -> usize { tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) } diff --git a/codex-rs/core/src/unified_exec/async_watcher.rs b/codex-rs/core/src/unified_exec/async_watcher.rs index 53b6b7c77d4..1fbb1e8f6d5 100644 --- a/codex-rs/core/src/unified_exec/async_watcher.rs +++ b/codex-rs/core/src/unified_exec/async_watcher.rs @@ -7,6 +7,8 @@ use tokio::time::Duration; use tokio::time::Instant; use tokio::time::Sleep; +use super::UnifiedExecContext; +use super::process::UnifiedExecProcess; use crate::codex::Session; use crate::codex::TurnContext; use crate::exec::ExecToolCallOutput; @@ -19,10 +21,7 @@ use crate::protocol::ExecOutputStream; use crate::tools::events::ToolEmitter; use crate::tools::events::ToolEventCtx; use crate::tools::events::ToolEventStage; - -use super::CommandTranscript; -use super::UnifiedExecContext; -use super::session::UnifiedExecSession; +use crate::unified_exec::head_tail_buffer::HeadTailBuffer; pub(crate) const TRAILING_OUTPUT_GRACE: Duration = Duration::from_millis(100); @@ -38,13 +37,13 @@ const UNIFIED_EXEC_OUTPUT_DELTA_MAX_BYTES: usize = 8192; /// shared transcript, and emits ExecCommandOutputDelta events on UTF‑8 /// boundaries. pub(crate) fn start_streaming_output( - session: &UnifiedExecSession, + process: &UnifiedExecProcess, context: &UnifiedExecContext, - transcript: Arc>, + transcript: Arc>, ) { - let mut receiver = session.output_receiver(); - let output_drained = session.output_drained_notify(); - let exit_token = session.cancellation_token(); + let mut receiver = process.output_receiver(); + let output_drained = process.output_drained_notify(); + let exit_token = process.cancellation_token(); let session_ref = Arc::clone(&context.session); let turn_ref = Arc::clone(&context.turn); @@ -105,24 +104,24 @@ pub(crate) fn start_streaming_output( /// single ExecCommandEnd event with the aggregated transcript. #[allow(clippy::too_many_arguments)] pub(crate) fn spawn_exit_watcher( - session: Arc, + process: Arc, session_ref: Arc, turn_ref: Arc, call_id: String, command: Vec, cwd: PathBuf, process_id: String, - transcript: Arc>, + transcript: Arc>, started_at: Instant, ) { - let exit_token = session.cancellation_token(); - let output_drained = session.output_drained_notify(); + let exit_token = process.cancellation_token(); + let output_drained = process.output_drained_notify(); tokio::spawn(async move { exit_token.cancelled().await; output_drained.notified().await; - let exit_code = session.exit_code().unwrap_or(-1); + let exit_code = process.exit_code().unwrap_or(-1); let duration = Instant::now().saturating_duration_since(started_at); emit_exec_end_for_unified_exec( session_ref, @@ -142,7 +141,7 @@ pub(crate) fn spawn_exit_watcher( async fn process_chunk( pending: &mut Vec, - transcript: &Arc>, + transcript: &Arc>, call_id: &str, session_ref: &Arc, turn_ref: &Arc, @@ -153,7 +152,7 @@ async fn process_chunk( while let Some(prefix) = split_valid_utf8_prefix(pending) { { let mut guard = transcript.lock().await; - guard.append(&prefix); + guard.push_chunk(prefix.to_vec()); } if *emitted_deltas >= MAX_EXEC_OUTPUT_DELTAS_PER_CALL { @@ -183,7 +182,7 @@ pub(crate) async fn emit_exec_end_for_unified_exec( command: Vec, cwd: PathBuf, process_id: Option, - transcript: Arc>, + transcript: Arc>, fallback_output: String, exit_code: i32, duration: Duration, @@ -240,15 +239,15 @@ fn split_valid_utf8_prefix_with_max(buffer: &mut Vec, max_bytes: usize) -> O } async fn resolve_aggregated_output( - transcript: &Arc>, + transcript: &Arc>, fallback: String, ) -> String { let guard = transcript.lock().await; - if guard.data.is_empty() { + if guard.retained_bytes() == 0 { return fallback; } - String::from_utf8_lossy(&guard.data).to_string() + String::from_utf8_lossy(&guard.to_bytes()).to_string() } #[cfg(test)] diff --git a/codex-rs/core/src/unified_exec/errors.rs b/codex-rs/core/src/unified_exec/errors.rs index 02031f22fab..d8df3892520 100644 --- a/codex-rs/core/src/unified_exec/errors.rs +++ b/codex-rs/core/src/unified_exec/errors.rs @@ -3,11 +3,11 @@ use thiserror::Error; #[derive(Debug, Error)] pub(crate) enum UnifiedExecError { - #[error("Failed to create unified exec session: {message}")] - CreateSession { message: String }, - // Called "session" in the model's training. - #[error("Unknown session id {process_id}")] - UnknownSessionId { process_id: String }, + #[error("Failed to create unified exec process: {message}")] + CreateProcess { message: String }, + // The model is trained on `session_id`, but internally we track a `process_id`. + #[error("Unknown process id {process_id}")] + UnknownProcessId { process_id: String }, #[error("failed to write to stdin")] WriteToStdin, #[error("missing command line for unified exec request")] @@ -20,8 +20,8 @@ pub(crate) enum UnifiedExecError { } impl UnifiedExecError { - pub(crate) fn create_session(message: String) -> Self { - Self::CreateSession { message } + pub(crate) fn create_process(message: String) -> Self { + Self::CreateProcess { message } } pub(crate) fn sandbox_denied(message: String, output: ExecToolCallOutput) -> Self { diff --git a/codex-rs/core/src/unified_exec/head_tail_buffer.rs b/codex-rs/core/src/unified_exec/head_tail_buffer.rs new file mode 100644 index 00000000000..85244660483 --- /dev/null +++ b/codex-rs/core/src/unified_exec/head_tail_buffer.rs @@ -0,0 +1,272 @@ +use crate::unified_exec::UNIFIED_EXEC_OUTPUT_MAX_BYTES; +use std::collections::VecDeque; + +/// A capped buffer that preserves a stable prefix ("head") and suffix ("tail"), +/// dropping the middle once it exceeds the configured maximum. The buffer is +/// symmetric meaning 50% of the capacity is allocated to the head and 50% is +/// allocated to the tail. +#[derive(Debug)] +pub(crate) struct HeadTailBuffer { + max_bytes: usize, + head_budget: usize, + tail_budget: usize, + head: VecDeque>, + tail: VecDeque>, + head_bytes: usize, + tail_bytes: usize, + omitted_bytes: usize, +} + +impl Default for HeadTailBuffer { + fn default() -> Self { + Self::new(UNIFIED_EXEC_OUTPUT_MAX_BYTES) + } +} + +impl HeadTailBuffer { + /// Create a new buffer that retains at most `max_bytes` of output. + /// + /// The retained output is split across a prefix ("head") and suffix ("tail") + /// budget, dropping bytes from the middle once the limit is exceeded. + pub(crate) fn new(max_bytes: usize) -> Self { + let head_budget = max_bytes / 2; + let tail_budget = max_bytes.saturating_sub(head_budget); + Self { + max_bytes, + head_budget, + tail_budget, + head: VecDeque::new(), + tail: VecDeque::new(), + head_bytes: 0, + tail_bytes: 0, + omitted_bytes: 0, + } + } + + // Used for tests. + #[allow(dead_code)] + /// Total bytes currently retained by the buffer (head + tail). + pub(crate) fn retained_bytes(&self) -> usize { + self.head_bytes.saturating_add(self.tail_bytes) + } + + // Used for tests. + #[allow(dead_code)] + /// Total bytes that were dropped from the middle due to the size cap. + pub(crate) fn omitted_bytes(&self) -> usize { + self.omitted_bytes + } + + /// Append a chunk of bytes to the buffer. + /// + /// Bytes are first added to the head until the head budget is full; any + /// remaining bytes are added to the tail, with older tail bytes being + /// dropped to preserve the tail budget. + pub(crate) fn push_chunk(&mut self, chunk: Vec) { + if self.max_bytes == 0 { + self.omitted_bytes = self.omitted_bytes.saturating_add(chunk.len()); + return; + } + + // Fill the head budget first, then keep a capped tail. + if self.head_bytes < self.head_budget { + let remaining_head = self.head_budget.saturating_sub(self.head_bytes); + if chunk.len() <= remaining_head { + self.head_bytes = self.head_bytes.saturating_add(chunk.len()); + self.head.push_back(chunk); + return; + } + + // Split the chunk: part goes to head, remainder goes to tail. + let (head_part, tail_part) = chunk.split_at(remaining_head); + if !head_part.is_empty() { + self.head_bytes = self.head_bytes.saturating_add(head_part.len()); + self.head.push_back(head_part.to_vec()); + } + self.push_to_tail(tail_part.to_vec()); + return; + } + + self.push_to_tail(chunk); + } + + /// Snapshot the retained output as a list of chunks. + /// + /// The returned chunks are ordered as: head chunks first, then tail chunks. + /// Omitted bytes are not represented in the snapshot. + pub(crate) fn snapshot_chunks(&self) -> Vec> { + let mut out = Vec::new(); + out.extend(self.head.iter().cloned()); + out.extend(self.tail.iter().cloned()); + out + } + + /// Return the retained output as a single byte vector. + /// + /// The output is formed by concatenating head chunks, then tail chunks. + /// Omitted bytes are not represented in the returned value. + pub(crate) fn to_bytes(&self) -> Vec { + let mut out = Vec::with_capacity(self.retained_bytes()); + for chunk in self.head.iter() { + out.extend_from_slice(chunk); + } + for chunk in self.tail.iter() { + out.extend_from_slice(chunk); + } + out + } + + /// Drain all retained chunks from the buffer and reset its state. + /// + /// The drained chunks are returned in head-then-tail order. Omitted bytes + /// are discarded along with the retained content. + pub(crate) fn drain_chunks(&mut self) -> Vec> { + let mut out: Vec> = self.head.drain(..).collect(); + out.extend(self.tail.drain(..)); + self.head_bytes = 0; + self.tail_bytes = 0; + self.omitted_bytes = 0; + out + } + + fn push_to_tail(&mut self, chunk: Vec) { + if self.tail_budget == 0 { + self.omitted_bytes = self.omitted_bytes.saturating_add(chunk.len()); + return; + } + + if chunk.len() >= self.tail_budget { + // This single chunk is larger than the whole tail budget. Keep only the last + // tail_budget bytes and drop everything else. + let start = chunk.len().saturating_sub(self.tail_budget); + let kept = chunk[start..].to_vec(); + let dropped = chunk.len().saturating_sub(kept.len()); + self.omitted_bytes = self + .omitted_bytes + .saturating_add(self.tail_bytes) + .saturating_add(dropped); + self.tail.clear(); + self.tail_bytes = kept.len(); + self.tail.push_back(kept); + return; + } + + self.tail_bytes = self.tail_bytes.saturating_add(chunk.len()); + self.tail.push_back(chunk); + self.trim_tail_to_budget(); + } + + fn trim_tail_to_budget(&mut self) { + let mut excess = self.tail_bytes.saturating_sub(self.tail_budget); + while excess > 0 { + match self.tail.front_mut() { + Some(front) if excess >= front.len() => { + excess -= front.len(); + self.tail_bytes = self.tail_bytes.saturating_sub(front.len()); + self.omitted_bytes = self.omitted_bytes.saturating_add(front.len()); + self.tail.pop_front(); + } + Some(front) => { + front.drain(..excess); + self.tail_bytes = self.tail_bytes.saturating_sub(excess); + self.omitted_bytes = self.omitted_bytes.saturating_add(excess); + break; + } + None => break, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::HeadTailBuffer; + + use pretty_assertions::assert_eq; + + #[test] + fn keeps_prefix_and_suffix_when_over_budget() { + let mut buf = HeadTailBuffer::new(10); + + buf.push_chunk(b"0123456789".to_vec()); + assert_eq!(buf.omitted_bytes(), 0); + + // Exceeds max by 2; we should keep head+tail and omit the middle. + buf.push_chunk(b"ab".to_vec()); + assert!(buf.omitted_bytes() > 0); + + let rendered = String::from_utf8_lossy(&buf.to_bytes()).to_string(); + assert!(rendered.starts_with("01234")); + assert!(rendered.ends_with("89ab")); + } + + #[test] + fn max_bytes_zero_drops_everything() { + let mut buf = HeadTailBuffer::new(0); + buf.push_chunk(b"abc".to_vec()); + + assert_eq!(buf.retained_bytes(), 0); + assert_eq!(buf.omitted_bytes(), 3); + assert_eq!(buf.to_bytes(), b"".to_vec()); + assert_eq!(buf.snapshot_chunks(), Vec::>::new()); + } + + #[test] + fn head_budget_zero_keeps_only_last_byte_in_tail() { + let mut buf = HeadTailBuffer::new(1); + buf.push_chunk(b"abc".to_vec()); + + assert_eq!(buf.retained_bytes(), 1); + assert_eq!(buf.omitted_bytes(), 2); + assert_eq!(buf.to_bytes(), b"c".to_vec()); + } + + #[test] + fn draining_resets_state() { + let mut buf = HeadTailBuffer::new(10); + buf.push_chunk(b"0123456789".to_vec()); + buf.push_chunk(b"ab".to_vec()); + + let drained = buf.drain_chunks(); + assert!(!drained.is_empty()); + + assert_eq!(buf.retained_bytes(), 0); + assert_eq!(buf.omitted_bytes(), 0); + assert_eq!(buf.to_bytes(), b"".to_vec()); + } + + #[test] + fn chunk_larger_than_tail_budget_keeps_only_tail_end() { + let mut buf = HeadTailBuffer::new(10); + buf.push_chunk(b"0123456789".to_vec()); + + // Tail budget is 5 bytes. This chunk should replace the tail and keep only its last 5 bytes. + buf.push_chunk(b"ABCDEFGHIJK".to_vec()); + + let out = String::from_utf8_lossy(&buf.to_bytes()).to_string(); + assert!(out.starts_with("01234")); + assert!(out.ends_with("GHIJK")); + assert!(buf.omitted_bytes() > 0); + } + + #[test] + fn fills_head_then_tail_across_multiple_chunks() { + let mut buf = HeadTailBuffer::new(10); + + // Fill the 5-byte head budget across multiple chunks. + buf.push_chunk(b"01".to_vec()); + buf.push_chunk(b"234".to_vec()); + assert_eq!(buf.to_bytes(), b"01234".to_vec()); + + // Then fill the 5-byte tail budget. + buf.push_chunk(b"567".to_vec()); + buf.push_chunk(b"89".to_vec()); + assert_eq!(buf.to_bytes(), b"0123456789".to_vec()); + assert_eq!(buf.omitted_bytes(), 0); + + // One more byte causes the tail to drop its oldest byte. + buf.push_chunk(b"a".to_vec()); + assert_eq!(buf.to_bytes(), b"012346789a".to_vec()); + assert_eq!(buf.omitted_bytes(), 1); + } +} diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 2cb30e5aa39..ae10054079f 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -1,7 +1,7 @@ -//! Unified Exec: interactive PTY execution orchestrated with approvals + sandboxing. +//! Unified Exec: interactive process execution orchestrated with approvals + sandboxing. //! //! Responsibilities -//! - Manages interactive PTY sessions (create, reuse, buffer output with caps). +//! - Manages interactive processes (create, reuse, buffer output with caps). //! - Uses the shared ToolOrchestrator to handle approval, sandbox selection, and //! retry semantics in a single, descriptive flow. //! - Spawns the PTY from a sandbox‑transformed `ExecEnv`; on sandbox denial, @@ -9,17 +9,17 @@ //! - Uses the shared `is_likely_sandbox_denied` heuristic to keep denial messages //! consistent with other exec paths. //! -//! Flow at a glance (open session) +//! Flow at a glance (open process) //! 1) Build a small request `{ command, cwd }`. //! 2) Orchestrator: approval (bypass/cache/prompt) → select sandbox → run. //! 3) Runtime: transform `CommandSpec` → `ExecEnv` → spawn PTY. //! 4) If denial, orchestrator retries with `SandboxType::None`. -//! 5) Session is returned with streaming output + metadata. +//! 5) Process handle is returned with streaming output + metadata. //! -//! This keeps policy logic and user interaction centralized while the PTY/session +//! This keeps policy logic and user interaction centralized while the PTY/process //! concerns remain isolated here. The implementation is split between: -//! - `session.rs`: PTY session lifecycle + output buffering. -//! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling. +//! - `process.rs`: PTY process lifecycle + output buffering. +//! - `process_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling. use std::collections::HashMap; use std::collections::HashSet; @@ -37,39 +37,22 @@ use crate::sandboxing::SandboxPermissions; mod async_watcher; mod errors; -mod session; -mod session_manager; +mod head_tail_buffer; +mod process; +mod process_manager; pub(crate) use errors::UnifiedExecError; -pub(crate) use session::UnifiedExecSession; +pub(crate) use process::UnifiedExecProcess; pub(crate) const MIN_YIELD_TIME_MS: u64 = 250; pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000; pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000; pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_TOKENS: usize = UNIFIED_EXEC_OUTPUT_MAX_BYTES / 4; -pub(crate) const MAX_UNIFIED_EXEC_SESSIONS: usize = 64; +pub(crate) const MAX_UNIFIED_EXEC_PROCESSES: usize = 64; -// Send a warning message to the models when it reaches this number of sessions. -pub(crate) const WARNING_UNIFIED_EXEC_SESSIONS: usize = 60; - -#[derive(Debug, Default)] -pub(crate) struct CommandTranscript { - pub data: Vec, -} - -impl CommandTranscript { - pub fn append(&mut self, bytes: &[u8]) { - self.data.extend_from_slice(bytes); - if self.data.len() > UNIFIED_EXEC_OUTPUT_MAX_BYTES { - let excess = self - .data - .len() - .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES); - self.data.drain(..excess); - } - } -} +// Send a warning message to the models when it reaches this number of processes. +pub(crate) const WARNING_UNIFIED_EXEC_PROCESSES: usize = 60; pub(crate) struct UnifiedExecContext { pub session: Arc, @@ -121,32 +104,32 @@ pub(crate) struct UnifiedExecResponse { } #[derive(Default)] -pub(crate) struct SessionStore { - sessions: HashMap, - reserved_sessions_id: HashSet, +pub(crate) struct ProcessStore { + processes: HashMap, + reserved_process_ids: HashSet, } -impl SessionStore { - fn remove(&mut self, session_id: &str) -> Option { - self.reserved_sessions_id.remove(session_id); - self.sessions.remove(session_id) +impl ProcessStore { + fn remove(&mut self, process_id: &str) -> Option { + self.reserved_process_ids.remove(process_id); + self.processes.remove(process_id) } } -pub(crate) struct UnifiedExecSessionManager { - session_store: Mutex, +pub(crate) struct UnifiedExecProcessManager { + process_store: Mutex, } -impl Default for UnifiedExecSessionManager { +impl Default for UnifiedExecProcessManager { fn default() -> Self { Self { - session_store: Mutex::new(SessionStore::default()), + process_store: Mutex::new(ProcessStore::default()), } } } -struct SessionEntry { - session: Arc, +struct ProcessEntry { + process: Arc, session_ref: Arc, turn_ref: Arc, call_id: String, @@ -173,6 +156,7 @@ pub(crate) fn generate_chunk_id() -> String { #[cfg(test)] #[cfg(unix)] mod tests { + use super::head_tail_buffer::HeadTailBuffer; use super::*; use crate::codex::Session; use crate::codex::TurnContext; @@ -185,8 +169,6 @@ mod tests { use std::sync::Arc; use tokio::time::Duration; - use super::session::OutputBufferState; - async fn test_session_and_turn() -> (Arc, Arc) { let (session, mut turn) = make_session_and_context().await; turn.approval_policy = AskForApproval::Never; @@ -245,21 +227,36 @@ mod tests { } #[test] - fn push_chunk_trims_only_excess_bytes() { - let mut buffer = OutputBufferState::default(); + fn push_chunk_preserves_prefix_and_suffix() { + let mut buffer = HeadTailBuffer::default(); buffer.push_chunk(vec![b'a'; UNIFIED_EXEC_OUTPUT_MAX_BYTES]); buffer.push_chunk(vec![b'b']); buffer.push_chunk(vec![b'c']); - assert_eq!(buffer.total_bytes, UNIFIED_EXEC_OUTPUT_MAX_BYTES); - let snapshot = buffer.snapshot(); - assert_eq!(snapshot.len(), 3); + assert_eq!(buffer.retained_bytes(), UNIFIED_EXEC_OUTPUT_MAX_BYTES); + let snapshot = buffer.snapshot_chunks(); + + let first = snapshot.first().expect("expected at least one chunk"); + assert_eq!(first.first(), Some(&b'a')); + assert!(snapshot.iter().any(|chunk| chunk.as_slice() == b"b")); assert_eq!( - snapshot.first().unwrap().len(), - UNIFIED_EXEC_OUTPUT_MAX_BYTES - 2 + snapshot + .last() + .expect("expected at least one chunk") + .as_slice(), + b"c" ); - assert_eq!(snapshot.get(2).unwrap(), &vec![b'c']); - assert_eq!(snapshot.get(1).unwrap(), &vec![b'b']); + } + + #[test] + fn head_tail_buffer_default_preserves_prefix_and_suffix() { + let mut buffer = HeadTailBuffer::default(); + buffer.push_chunk(vec![b'a'; UNIFIED_EXEC_OUTPUT_MAX_BYTES]); + buffer.push_chunk(b"bc".to_vec()); + + let rendered = buffer.to_bytes(); + assert_eq!(rendered.first(), Some(&b'a')); + assert!(rendered.ends_with(b"bc")); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -424,10 +421,10 @@ mod tests { session .services .unified_exec_manager - .session_store + .process_store .lock() .await - .sessions + .processes .is_empty() ); @@ -435,7 +432,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn reusing_completed_session_returns_unknown_session() -> anyhow::Result<()> { + async fn reusing_completed_process_returns_unknown_process() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); let (session, turn) = test_session_and_turn().await; @@ -453,23 +450,23 @@ mod tests { let err = write_stdin(&session, process_id, "", 100) .await - .expect_err("expected unknown session error"); + .expect_err("expected unknown process error"); match err { - UnifiedExecError::UnknownSessionId { process_id: err_id } => { + UnifiedExecError::UnknownProcessId { process_id: err_id } => { assert_eq!(err_id, process_id, "process id should match request"); } - other => panic!("expected UnknownSessionId, got {other:?}"), + other => panic!("expected UnknownProcessId, got {other:?}"), } assert!( session .services .unified_exec_manager - .session_store + .process_store .lock() .await - .sessions + .processes .is_empty() ); diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/process.rs similarity index 75% rename from codex-rs/core/src/unified_exec/session.rs rename to codex-rs/core/src/unified_exec/process.rs index 4973a1a6417..d52d1a6a9db 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/process.rs @@ -1,6 +1,5 @@ #![allow(clippy::module_inception)] -use std::collections::VecDeque; use std::sync::Arc; use tokio::sync::Mutex; use tokio::sync::Notify; @@ -19,54 +18,11 @@ use crate::truncate::formatted_truncate_text; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; -use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES; use super::UNIFIED_EXEC_OUTPUT_MAX_TOKENS; use super::UnifiedExecError; +use super::head_tail_buffer::HeadTailBuffer; -#[derive(Debug, Default)] -pub(crate) struct OutputBufferState { - chunks: VecDeque>, - pub(crate) total_bytes: usize, -} - -impl OutputBufferState { - pub(super) fn push_chunk(&mut self, chunk: Vec) { - self.total_bytes = self.total_bytes.saturating_add(chunk.len()); - self.chunks.push_back(chunk); - - let mut excess = self - .total_bytes - .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES); - - while excess > 0 { - match self.chunks.front_mut() { - Some(front) if excess >= front.len() => { - excess -= front.len(); - self.total_bytes = self.total_bytes.saturating_sub(front.len()); - self.chunks.pop_front(); - } - Some(front) => { - front.drain(..excess); - self.total_bytes = self.total_bytes.saturating_sub(excess); - break; - } - None => break, - } - } - } - - pub(super) fn drain(&mut self) -> Vec> { - let drained: Vec> = self.chunks.drain(..).collect(); - self.total_bytes = 0; - drained - } - - pub(super) fn snapshot(&self) -> Vec> { - self.chunks.iter().cloned().collect() - } -} - -pub(crate) type OutputBuffer = Arc>; +pub(crate) type OutputBuffer = Arc>; pub(crate) struct OutputHandles { pub(crate) output_buffer: OutputBuffer, pub(crate) output_notify: Arc, @@ -74,8 +30,8 @@ pub(crate) struct OutputHandles { } #[derive(Debug)] -pub(crate) struct UnifiedExecSession { - session: ExecCommandSession, +pub(crate) struct UnifiedExecProcess { + process_handle: ExecCommandSession, output_buffer: OutputBuffer, output_notify: Arc, cancellation_token: CancellationToken, @@ -84,13 +40,13 @@ pub(crate) struct UnifiedExecSession { sandbox_type: SandboxType, } -impl UnifiedExecSession { +impl UnifiedExecProcess { pub(super) fn new( - session: ExecCommandSession, + process_handle: ExecCommandSession, initial_output_rx: tokio::sync::broadcast::Receiver>, sandbox_type: SandboxType, ) -> Self { - let output_buffer = Arc::new(Mutex::new(OutputBufferState::default())); + let output_buffer = Arc::new(Mutex::new(HeadTailBuffer::default())); let output_notify = Arc::new(Notify::new()); let cancellation_token = CancellationToken::new(); let output_drained = Arc::new(Notify::new()); @@ -113,7 +69,7 @@ impl UnifiedExecSession { }); Self { - session, + process_handle, output_buffer, output_notify, cancellation_token, @@ -124,7 +80,7 @@ impl UnifiedExecSession { } pub(super) fn writer_sender(&self) -> mpsc::Sender> { - self.session.writer_sender() + self.process_handle.writer_sender() } pub(super) fn output_handles(&self) -> OutputHandles { @@ -136,7 +92,7 @@ impl UnifiedExecSession { } pub(super) fn output_receiver(&self) -> tokio::sync::broadcast::Receiver> { - self.session.output_receiver() + self.process_handle.output_receiver() } pub(super) fn cancellation_token(&self) -> CancellationToken { @@ -148,22 +104,22 @@ impl UnifiedExecSession { } pub(super) fn has_exited(&self) -> bool { - self.session.has_exited() + self.process_handle.has_exited() } pub(super) fn exit_code(&self) -> Option { - self.session.exit_code() + self.process_handle.exit_code() } pub(super) fn terminate(&self) { - self.session.terminate(); + self.process_handle.terminate(); self.cancellation_token.cancel(); self.output_task.abort(); } async fn snapshot_output(&self) -> Vec> { let guard = self.output_buffer.lock().await; - guard.snapshot() + guard.snapshot_chunks() } pub(crate) fn sandbox_type(&self) -> SandboxType { @@ -208,7 +164,7 @@ impl UnifiedExecSession { TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), ); let message = if snippet.is_empty() { - format!("Session exited with code {exit_code}") + format!("Process exited with code {exit_code}") } else { snippet }; @@ -222,11 +178,11 @@ impl UnifiedExecSession { sandbox_type: SandboxType, ) -> Result { let SpawnedPty { - session, + session: process_handle, output_rx, mut exit_rx, } = spawned; - let managed = Self::new(session, output_rx, sandbox_type); + let managed = Self::new(process_handle, output_rx, sandbox_type); let exit_ready = matches!(exit_rx.try_recv(), Ok(_) | Err(TryRecvError::Closed)); @@ -261,7 +217,7 @@ impl UnifiedExecSession { } } -impl Drop for UnifiedExecSession { +impl Drop for UnifiedExecProcess { fn drop(&mut self) { self.terminate(); } diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/process_manager.rs similarity index 76% rename from codex-rs/core/src/unified_exec/session_manager.rs rename to codex-rs/core/src/unified_exec/process_manager.rs index c97820e4643..2e80156114f 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/process_manager.rs @@ -16,8 +16,12 @@ use crate::codex::TurnContext; use crate::exec_env::create_env; use crate::protocol::BackgroundEventEvent; use crate::protocol::EventMsg; +use crate::protocol::ExecCommandSource; use crate::sandboxing::ExecEnv; use crate::sandboxing::SandboxPermissions; +use crate::tools::events::ToolEmitter; +use crate::tools::events::ToolEventCtx; +use crate::tools::events::ToolEventStage; use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; @@ -25,29 +29,28 @@ use crate::tools::sandboxing::ToolCtx; use crate::truncate::TruncationPolicy; use crate::truncate::approx_token_count; use crate::truncate::formatted_truncate_text; - -use super::CommandTranscript; -use super::ExecCommandRequest; -use super::MAX_UNIFIED_EXEC_SESSIONS; -use super::SessionEntry; -use super::SessionStore; -use super::UnifiedExecContext; -use super::UnifiedExecError; -use super::UnifiedExecResponse; -use super::UnifiedExecSessionManager; -use super::WARNING_UNIFIED_EXEC_SESSIONS; -use super::WriteStdinRequest; -use super::async_watcher::emit_exec_end_for_unified_exec; -use super::async_watcher::spawn_exit_watcher; -use super::async_watcher::start_streaming_output; -use super::clamp_yield_time; -use super::generate_chunk_id; -use super::resolve_max_tokens; -use super::session::OutputBuffer; -use super::session::OutputHandles; -use super::session::UnifiedExecSession; - -const UNIFIED_EXEC_ENV: [(&str, &str); 8] = [ +use crate::unified_exec::ExecCommandRequest; +use crate::unified_exec::MAX_UNIFIED_EXEC_PROCESSES; +use crate::unified_exec::ProcessEntry; +use crate::unified_exec::ProcessStore; +use crate::unified_exec::UnifiedExecContext; +use crate::unified_exec::UnifiedExecError; +use crate::unified_exec::UnifiedExecProcessManager; +use crate::unified_exec::UnifiedExecResponse; +use crate::unified_exec::WARNING_UNIFIED_EXEC_PROCESSES; +use crate::unified_exec::WriteStdinRequest; +use crate::unified_exec::async_watcher::emit_exec_end_for_unified_exec; +use crate::unified_exec::async_watcher::spawn_exit_watcher; +use crate::unified_exec::async_watcher::start_streaming_output; +use crate::unified_exec::clamp_yield_time; +use crate::unified_exec::generate_chunk_id; +use crate::unified_exec::head_tail_buffer::HeadTailBuffer; +use crate::unified_exec::process::OutputBuffer; +use crate::unified_exec::process::OutputHandles; +use crate::unified_exec::process::UnifiedExecProcess; +use crate::unified_exec::resolve_max_tokens; + +const UNIFIED_EXEC_ENV: [(&str, &str); 9] = [ ("NO_COLOR", "1"), ("TERM", "dumb"), ("LANG", "C.UTF-8"), @@ -56,6 +59,7 @@ const UNIFIED_EXEC_ENV: [(&str, &str); 8] = [ ("COLORTERM", ""), ("PAGER", "cat"), ("GIT_PAGER", "cat"), + ("GH_PAGER", "cat"), ]; fn apply_unified_exec_env(mut env: HashMap) -> HashMap { @@ -65,7 +69,7 @@ fn apply_unified_exec_env(mut env: HashMap) -> HashMap>, output_buffer: OutputBuffer, output_notify: Arc, @@ -76,10 +80,10 @@ struct PreparedSessionHandles { process_id: String, } -impl UnifiedExecSessionManager { +impl UnifiedExecProcessManager { pub(crate) async fn allocate_process_id(&self) -> String { loop { - let mut store = self.session_store.lock().await; + let mut store = self.process_store.lock().await; let process_id = if !cfg!(test) && !cfg!(feature = "deterministic_process_ids") { // production mode → random @@ -87,7 +91,7 @@ impl UnifiedExecSessionManager { } else { // test or deterministic mode let next = store - .reserved_sessions_id + .reserved_process_ids .iter() .filter_map(|s| s.parse::().ok()) .max() @@ -97,17 +101,17 @@ impl UnifiedExecSessionManager { next.to_string() }; - if store.reserved_sessions_id.contains(&process_id) { + if store.reserved_process_ids.contains(&process_id) { continue; } - store.reserved_sessions_id.insert(process_id.clone()); + store.reserved_process_ids.insert(process_id.clone()); return process_id; } } pub(crate) async fn release_process_id(&self, process_id: &str) { - let mut store = self.session_store.lock().await; + let mut store = self.process_store.lock().await; store.remove(process_id); } @@ -121,7 +125,7 @@ impl UnifiedExecSessionManager { .clone() .unwrap_or_else(|| context.turn.cwd.clone()); - let session = self + let process = self .open_session_with_sandbox( &request.command, cwd.clone(), @@ -131,16 +135,30 @@ impl UnifiedExecSessionManager { ) .await; - let session = match session { - Ok(session) => Arc::new(session), + let process = match process { + Ok(process) => Arc::new(process), Err(err) => { self.release_process_id(&request.process_id).await; return Err(err); } }; - let transcript = Arc::new(tokio::sync::Mutex::new(CommandTranscript::default())); - start_streaming_output(&session, context, Arc::clone(&transcript)); + let transcript = Arc::new(tokio::sync::Mutex::new(HeadTailBuffer::default())); + let event_ctx = ToolEventCtx::new( + context.session.as_ref(), + context.turn.as_ref(), + &context.call_id, + None, + ); + let emitter = ToolEmitter::unified_exec( + &request.command, + cwd.clone(), + ExecCommandSource::UnifiedExecStartup, + Some(request.process_id.clone()), + ); + emitter.emit(event_ctx, ToolEventStage::Begin).await; + + start_streaming_output(&process, context, Arc::clone(&transcript)); let max_tokens = resolve_max_tokens(request.max_output_tokens); let yield_time_ms = clamp_yield_time(request.yield_time_ms); @@ -153,7 +171,7 @@ impl UnifiedExecSessionManager { output_buffer, output_notify, cancellation_token, - } = session.output_handles(); + } = process.output_handles(); let deadline = start + Duration::from_millis(yield_time_ms); let collected = Self::collect_output_until_deadline( &output_buffer, @@ -166,8 +184,8 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); - let exit_code = session.exit_code(); - let has_exited = session.has_exited() || exit_code.is_some(); + let exit_code = process.exit_code(); + let has_exited = process.has_exited() || exit_code.is_some(); let chunk_id = generate_chunk_id(); let process_id = request.process_id.clone(); if has_exited { @@ -190,14 +208,14 @@ impl UnifiedExecSessionManager { .await; self.release_process_id(&request.process_id).await; - session.check_for_sandbox_denial_with_text(&text).await?; + process.check_for_sandbox_denial_with_text(&text).await?; } else { - // Long‑lived command: persist the session so write_stdin can reuse + // Long‑lived command: persist the process so write_stdin can reuse // it, and register a background watcher that will emit // ExecCommandEnd when the PTY eventually exits (even if no further // tool calls are made). - self.store_session( - Arc::clone(&session), + self.store_process( + Arc::clone(&process), context, &request.command, cwd.clone(), @@ -236,7 +254,7 @@ impl UnifiedExecSessionManager { ) -> Result { let process_id = request.process_id.to_string(); - let PreparedSessionHandles { + let PreparedProcessHandles { writer_tx, output_buffer, output_notify, @@ -246,7 +264,7 @@ impl UnifiedExecSessionManager { command: session_command, process_id, .. - } = self.prepare_session_handles(process_id.as_str()).await?; + } = self.prepare_process_handles(process_id.as_str()).await?; if !request.input.is_empty() { Self::send_input(&writer_tx, request.input.as_bytes()).await?; @@ -273,23 +291,23 @@ impl UnifiedExecSessionManager { let original_token_count = approx_token_count(&text); let chunk_id = generate_chunk_id(); - // After polling, refresh_session_state tells us whether the PTY is + // After polling, refresh_process_state tells us whether the PTY is // still alive or has exited and been removed from the store; we thread // that through so the handler can tag TerminalInteraction with an // appropriate process_id and exit_code. - let status = self.refresh_session_state(process_id.as_str()).await; + let status = self.refresh_process_state(process_id.as_str()).await; let (process_id, exit_code, event_call_id) = match status { - SessionStatus::Alive { + ProcessStatus::Alive { exit_code, call_id, process_id, } => (Some(process_id), exit_code, call_id), - SessionStatus::Exited { exit_code, entry } => { + ProcessStatus::Exited { exit_code, entry } => { let call_id = entry.call_id.clone(); (None, exit_code, call_id) } - SessionStatus::Unknown => { - return Err(UnifiedExecError::UnknownSessionId { + ProcessStatus::Unknown => { + return Err(UnifiedExecError::UnknownProcessId { process_id: request.process_id.to_string(), }); } @@ -314,25 +332,25 @@ impl UnifiedExecSessionManager { Ok(response) } - async fn refresh_session_state(&self, process_id: &str) -> SessionStatus { - let mut store = self.session_store.lock().await; - let Some(entry) = store.sessions.get(process_id) else { - return SessionStatus::Unknown; + async fn refresh_process_state(&self, process_id: &str) -> ProcessStatus { + let mut store = self.process_store.lock().await; + let Some(entry) = store.processes.get(process_id) else { + return ProcessStatus::Unknown; }; - let exit_code = entry.session.exit_code(); + let exit_code = entry.process.exit_code(); let process_id = entry.process_id.clone(); - if entry.session.has_exited() { + if entry.process.has_exited() { let Some(entry) = store.remove(&process_id) else { - return SessionStatus::Unknown; + return ProcessStatus::Unknown; }; - SessionStatus::Exited { + ProcessStatus::Exited { exit_code, entry: Box::new(entry), } } else { - SessionStatus::Alive { + ProcessStatus::Alive { exit_code, call_id: entry.call_id.clone(), process_id, @@ -340,16 +358,16 @@ impl UnifiedExecSessionManager { } } - async fn prepare_session_handles( + async fn prepare_process_handles( &self, process_id: &str, - ) -> Result { - let mut store = self.session_store.lock().await; + ) -> Result { + let mut store = self.process_store.lock().await; let entry = store - .sessions + .processes .get_mut(process_id) - .ok_or(UnifiedExecError::UnknownSessionId { + .ok_or(UnifiedExecError::UnknownProcessId { process_id: process_id.to_string(), })?; entry.last_used = Instant::now(); @@ -357,10 +375,10 @@ impl UnifiedExecSessionManager { output_buffer, output_notify, cancellation_token, - } = entry.session.output_handles(); + } = entry.process.output_handles(); - Ok(PreparedSessionHandles { - writer_tx: entry.session.writer_sender(), + Ok(PreparedProcessHandles { + writer_tx: entry.process.writer_sender(), output_buffer, output_notify, cancellation_token, @@ -382,18 +400,18 @@ impl UnifiedExecSessionManager { } #[allow(clippy::too_many_arguments)] - async fn store_session( + async fn store_process( &self, - session: Arc, + process: Arc, context: &UnifiedExecContext, command: &[String], cwd: PathBuf, started_at: Instant, process_id: String, - transcript: Arc>, + transcript: Arc>, ) { - let entry = SessionEntry { - session: Arc::clone(&session), + let entry = ProcessEntry { + process: Arc::clone(&process), session_ref: Arc::clone(&context.session), turn_ref: Arc::clone(&context.turn), call_id: context.call_id.clone(), @@ -401,25 +419,25 @@ impl UnifiedExecSessionManager { command: command.to_vec(), last_used: started_at, }; - let number_sessions = { - let mut store = self.session_store.lock().await; - Self::prune_sessions_if_needed(&mut store); - store.sessions.insert(process_id.clone(), entry); - store.sessions.len() + let number_processes = { + let mut store = self.process_store.lock().await; + Self::prune_processes_if_needed(&mut store); + store.processes.insert(process_id.clone(), entry); + store.processes.len() }; - if number_sessions >= WARNING_UNIFIED_EXEC_SESSIONS { + if number_processes >= WARNING_UNIFIED_EXEC_PROCESSES { context .session .record_model_warning( - format!("The maximum number of unified exec sessions you can keep open is {WARNING_UNIFIED_EXEC_SESSIONS} and you currently have {number_sessions} sessions open. Reuse older sessions or close them to prevent automatic pruning of old session"), + format!("The maximum number of unified exec processes you can keep open is {WARNING_UNIFIED_EXEC_PROCESSES} and you currently have {number_processes} processes open. Reuse older processes or close them to prevent automatic pruning of old processes"), &context.turn ) .await; }; spawn_exit_watcher( - Arc::clone(&session), + Arc::clone(&process), Arc::clone(&context.session), Arc::clone(&context.turn), context.call_id.clone(), @@ -453,7 +471,7 @@ impl UnifiedExecSessionManager { pub(crate) async fn open_session_with_exec_env( &self, env: &ExecEnv, - ) -> Result { + ) -> Result { let (program, args) = env .command .split_first() @@ -467,8 +485,8 @@ impl UnifiedExecSessionManager { &env.arg0, ) .await - .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; - UnifiedExecSession::from_spawned(spawned, env.sandbox).await + .map_err(|err| UnifiedExecError::create_process(err.to_string()))?; + UnifiedExecProcess::from_spawned(spawned, env.sandbox).await } pub(super) async fn open_session_with_sandbox( @@ -478,7 +496,7 @@ impl UnifiedExecSessionManager { sandbox_permissions: SandboxPermissions, justification: Option, context: &UnifiedExecContext, - ) -> Result { + ) -> Result { let env = apply_unified_exec_env(create_env(&context.turn.shell_environment_policy)); let features = context.session.features(); let mut orchestrator = ToolOrchestrator::new(); @@ -518,7 +536,7 @@ impl UnifiedExecSessionManager { context.turn.approval_policy, ) .await - .map_err(|e| UnifiedExecError::create_session(format!("{e:?}"))) + .map_err(|e| UnifiedExecError::create_process(format!("{e:?}"))) } pub(super) async fn collect_output_until_deadline( @@ -532,11 +550,11 @@ impl UnifiedExecSessionManager { let mut collected: Vec = Vec::with_capacity(4096); let mut exit_signal_received = cancellation_token.is_cancelled(); loop { - let drained_chunks; + let drained_chunks: Vec>; let mut wait_for_output = None; { let mut guard = output_buffer.lock().await; - drained_chunks = guard.drain(); + drained_chunks = guard.drain_chunks(); if drained_chunks.is_empty() { wait_for_output = Some(output_notify.notified()); } @@ -582,20 +600,20 @@ impl UnifiedExecSessionManager { collected } - fn prune_sessions_if_needed(store: &mut SessionStore) -> bool { - if store.sessions.len() < MAX_UNIFIED_EXEC_SESSIONS { + fn prune_processes_if_needed(store: &mut ProcessStore) -> bool { + if store.processes.len() < MAX_UNIFIED_EXEC_PROCESSES { return false; } let meta: Vec<(String, Instant, bool)> = store - .sessions + .processes .iter() - .map(|(id, entry)| (id.clone(), entry.last_used, entry.session.has_exited())) + .map(|(id, entry)| (id.clone(), entry.last_used, entry.process.has_exited())) .collect(); - if let Some(session_id) = Self::session_id_to_prune_from_meta(&meta) { - if let Some(entry) = store.remove(&session_id) { - entry.session.terminate(); + if let Some(process_id) = Self::process_id_to_prune_from_meta(&meta) { + if let Some(entry) = store.remove(&process_id) { + entry.process.terminate(); } return true; } @@ -604,7 +622,7 @@ impl UnifiedExecSessionManager { } // Centralized pruning policy so we can easily swap strategies later. - fn session_id_to_prune_from_meta(meta: &[(String, Instant, bool)]) -> Option { + fn process_id_to_prune_from_meta(meta: &[(String, Instant, bool)]) -> Option { if meta.is_empty() { return None; } @@ -632,22 +650,25 @@ impl UnifiedExecSessionManager { .map(|(process_id, _, _)| process_id) } - pub(crate) async fn terminate_all_sessions(&self) { - let entries: Vec = { - let mut sessions = self.session_store.lock().await; - let entries: Vec = - sessions.sessions.drain().map(|(_, entry)| entry).collect(); - sessions.reserved_sessions_id.clear(); + pub(crate) async fn terminate_all_processes(&self) { + let entries: Vec = { + let mut processes = self.process_store.lock().await; + let entries: Vec = processes + .processes + .drain() + .map(|(_, entry)| entry) + .collect(); + processes.reserved_process_ids.clear(); entries }; for entry in entries { - entry.session.terminate(); + entry.process.terminate(); } } } -enum SessionStatus { +enum ProcessStatus { Alive { exit_code: Option, call_id: String, @@ -655,7 +676,7 @@ enum SessionStatus { }, Exited { exit_code: Option, - entry: Box, + entry: Box, }, Unknown, } @@ -679,6 +700,7 @@ mod tests { ("COLORTERM".to_string(), String::new()), ("PAGER".to_string(), "cat".to_string()), ("GIT_PAGER".to_string(), "cat".to_string()), + ("GH_PAGER".to_string(), "cat".to_string()), ]); assert_eq!(env, expected); @@ -697,7 +719,7 @@ mod tests { } #[test] - fn pruning_prefers_exited_sessions_outside_recently_used() { + fn pruning_prefers_exited_processes_outside_recently_used() { let now = Instant::now(); let id = |n: i32| n.to_string(); let meta = vec![ @@ -713,7 +735,7 @@ mod tests { (id(10), now - Duration::from_secs(13), false), ]; - let candidate = UnifiedExecSessionManager::session_id_to_prune_from_meta(&meta); + let candidate = UnifiedExecProcessManager::process_id_to_prune_from_meta(&meta); assert_eq!(candidate, Some(id(2))); } @@ -735,13 +757,13 @@ mod tests { (id(10), now - Duration::from_secs(13), false), ]; - let candidate = UnifiedExecSessionManager::session_id_to_prune_from_meta(&meta); + let candidate = UnifiedExecProcessManager::process_id_to_prune_from_meta(&meta); assert_eq!(candidate, Some(id(1))); } #[test] - fn pruning_protects_recent_sessions_even_if_exited() { + fn pruning_protects_recent_processes_even_if_exited() { let now = Instant::now(); let id = |n: i32| n.to_string(); let meta = vec![ @@ -757,7 +779,7 @@ mod tests { (id(10), now - Duration::from_secs(13), true), ]; - let candidate = UnifiedExecSessionManager::session_id_to_prune_from_meta(&meta); + let candidate = UnifiedExecProcessManager::process_id_to_prune_from_meta(&meta); // (10) is exited but among the last 8; we should drop the LRU outside that set. assert_eq!(candidate, Some(id(1))); diff --git a/codex-rs/core/tests/chat_completions_payload.rs b/codex-rs/core/tests/chat_completions_payload.rs index 8af5df21695..54d13367a5f 100644 --- a/codex-rs/core/tests/chat_completions_payload.rs +++ b/codex-rs/core/tests/chat_completions_payload.rs @@ -13,8 +13,8 @@ use codex_core::Prompt; use codex_core::ResponseItem; use codex_core::WireApi; use codex_core::models_manager::manager::ModelsManager; -use codex_otel::otel_manager::OtelManager; -use codex_protocol::ConversationId; +use codex_otel::OtelManager; +use codex_protocol::ThreadId; use codex_protocol::models::ReasoningItemContent; use codex_protocol::protocol::SessionSource; use core_test_support::load_default_config_for_test; @@ -73,13 +73,13 @@ async fn run_request(input: Vec) -> Value { let summary = config.model_reasoning_summary; let config = Arc::new(config); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let model = ModelsManager::get_model_offline(config.model.as_deref()); - let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config); + let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config); let otel_manager = OtelManager::new( conversation_id, model.as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), Some(AuthMode::ApiKey), @@ -91,7 +91,7 @@ async fn run_request(input: Vec) -> Value { let client = ModelClient::new( Arc::clone(&config), None, - model_family, + model_info, otel_manager, provider, effort, diff --git a/codex-rs/core/tests/chat_completions_sse.rs b/codex-rs/core/tests/chat_completions_sse.rs index 4f05838279a..65b1f229b3f 100644 --- a/codex-rs/core/tests/chat_completions_sse.rs +++ b/codex-rs/core/tests/chat_completions_sse.rs @@ -12,8 +12,8 @@ use codex_core::ResponseEvent; use codex_core::ResponseItem; use codex_core::WireApi; use codex_core::models_manager::manager::ModelsManager; -use codex_otel::otel_manager::OtelManager; -use codex_protocol::ConversationId; +use codex_otel::OtelManager; +use codex_protocol::ThreadId; use codex_protocol::models::ReasoningItemContent; use codex_protocol::protocol::SessionSource; use core_test_support::load_default_config_for_test; @@ -72,15 +72,15 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec { let summary = config.model_reasoning_summary; let config = Arc::new(config); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let auth_mode = auth_manager.get_auth_mode(); let model = ModelsManager::get_model_offline(config.model.as_deref()); - let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config); + let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config); let otel_manager = OtelManager::new( conversation_id, model.as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), auth_mode, @@ -92,7 +92,7 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec { let client = ModelClient::new( Arc::clone(&config), None, - model_family, + model_info, otel_manager, provider, effort, diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 9568ec2786c..45e8b0b46f3 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -2,7 +2,7 @@ use tempfile::TempDir; -use codex_core::CodexConversation; +use codex_core::CodexThread; use codex_core::config::Config; use codex_core::config::ConfigBuilder; use codex_core::config::ConfigOverrides; @@ -170,10 +170,7 @@ pub fn load_sse_fixture_with_id(path: impl AsRef, id: &str) -> .collect() } -pub async fn wait_for_event( - codex: &CodexConversation, - predicate: F, -) -> codex_core::protocol::EventMsg +pub async fn wait_for_event(codex: &CodexThread, predicate: F) -> codex_core::protocol::EventMsg where F: FnMut(&codex_core::protocol::EventMsg) -> bool, { @@ -181,7 +178,7 @@ where wait_for_event_with_timeout(codex, predicate, Duration::from_secs(1)).await } -pub async fn wait_for_event_match(codex: &CodexConversation, matcher: F) -> T +pub async fn wait_for_event_match(codex: &CodexThread, matcher: F) -> T where F: Fn(&codex_core::protocol::EventMsg) -> Option, { @@ -190,7 +187,7 @@ where } pub async fn wait_for_event_with_timeout( - codex: &CodexConversation, + codex: &CodexThread, mut predicate: F, wait_time: tokio::time::Duration, ) -> codex_core::protocol::EventMsg diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index a081a6bf12d..24d5dd8bc7a 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -5,9 +5,9 @@ use std::sync::Arc; use anyhow::Result; use codex_core::CodexAuth; -use codex_core::CodexConversation; -use codex_core::ConversationManager; +use codex_core::CodexThread; use codex_core::ModelProviderInfo; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_core::config::Config; use codex_core::features::Feature; @@ -138,33 +138,30 @@ impl TestCodexBuilder { resume_from: Option, ) -> anyhow::Result { let auth = self.auth.clone(); - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( auth.clone(), config.model_provider.clone(), config.codex_home.clone(), ); + let thread_manager = Arc::new(thread_manager); let new_conversation = match resume_from { Some(path) => { let auth_manager = codex_core::AuthManager::from_auth_for_testing(auth); - conversation_manager - .resume_conversation_from_rollout(config.clone(), path, auth_manager) - .await? - } - None => { - conversation_manager - .new_conversation(config.clone()) + thread_manager + .resume_thread_from_rollout(config.clone(), path, auth_manager) .await? } + None => thread_manager.start_thread(config.clone()).await?, }; Ok(TestCodex { home, cwd, config, - codex: new_conversation.conversation, + codex: new_conversation.thread, session_configured: new_conversation.session_configured, - conversation_manager: Arc::new(conversation_manager), + thread_manager, }) } @@ -207,10 +204,10 @@ impl TestCodexBuilder { pub struct TestCodex { pub home: Arc, pub cwd: Arc, - pub codex: Arc, + pub codex: Arc, pub session_configured: SessionConfiguredEvent, pub config: Config, - pub conversation_manager: Arc, + pub thread_manager: Arc, } impl TestCodex { diff --git a/codex-rs/core/tests/responses_headers.rs b/codex-rs/core/tests/responses_headers.rs index 3b0ffd2983c..dbbf0d57dfd 100644 --- a/codex-rs/core/tests/responses_headers.rs +++ b/codex-rs/core/tests/responses_headers.rs @@ -11,8 +11,8 @@ use codex_core::ResponseEvent; use codex_core::ResponseItem; use codex_core::WireApi; use codex_core::models_manager::manager::ModelsManager; -use codex_otel::otel_manager::OtelManager; -use codex_protocol::ConversationId; +use codex_otel::OtelManager; +use codex_protocol::ThreadId; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::SubAgentSource; @@ -65,14 +65,14 @@ async fn responses_stream_includes_subagent_header_on_review() { config.model = Some(model.clone()); let config = Arc::new(config); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let auth_mode = AuthMode::ChatGPT; let session_source = SessionSource::SubAgent(SubAgentSource::Review); - let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config); + let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config); let otel_manager = OtelManager::new( conversation_id, model.as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), Some(auth_mode), @@ -84,7 +84,7 @@ async fn responses_stream_includes_subagent_header_on_review() { let client = ModelClient::new( Arc::clone(&config), None, - model_family, + model_info, otel_manager, provider, effort, @@ -159,15 +159,15 @@ async fn responses_stream_includes_subagent_header_on_other() { config.model = Some(model.clone()); let config = Arc::new(config); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let auth_mode = AuthMode::ChatGPT; let session_source = SessionSource::SubAgent(SubAgentSource::Other("my-task".to_string())); - let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config); + let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config); let otel_manager = OtelManager::new( conversation_id, model.as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), Some(auth_mode), @@ -179,7 +179,7 @@ async fn responses_stream_includes_subagent_header_on_other() { let client = ModelClient::new( Arc::clone(&config), None, - model_family, + model_info, otel_manager, provider, effort, @@ -212,7 +212,7 @@ async fn responses_stream_includes_subagent_header_on_other() { } #[tokio::test] -async fn responses_respects_model_family_overrides_from_config() { +async fn responses_respects_model_info_overrides_from_config() { core_test_support::skip_if_no_network!(); let server = responses::start_mock_server().await; @@ -251,16 +251,16 @@ async fn responses_respects_model_family_overrides_from_config() { let model = config.model.clone().expect("model configured"); let config = Arc::new(config); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let auth_mode = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")).get_auth_mode(); let session_source = SessionSource::SubAgent(SubAgentSource::Other("override-check".to_string())); - let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config); + let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config); let otel_manager = OtelManager::new( conversation_id, model.as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), auth_mode, @@ -272,7 +272,7 @@ async fn responses_respects_model_family_overrides_from_config() { let client = ModelClient::new( Arc::clone(&config), None, - model_family, + model_info, otel_manager, provider, effort, diff --git a/codex-rs/core/tests/suite/abort_tasks.rs b/codex-rs/core/tests/suite/abort_tasks.rs index 0d4a807a3c6..53d32e140d0 100644 --- a/codex-rs/core/tests/suite/abort_tasks.rs +++ b/codex-rs/core/tests/suite/abort_tasks.rs @@ -49,6 +49,7 @@ async fn interrupt_long_running_tool_emits_turn_aborted() { items: vec![UserInput::Text { text: "start sleep".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -101,6 +102,7 @@ async fn interrupt_tool_records_history_entries() { items: vec![UserInput::Text { text: "start history recording".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -117,6 +119,7 @@ async fn interrupt_tool_records_history_entries() { items: vec![UserInput::Text { text: "follow up".into(), }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs index 74e38534bd6..ff8eca6f997 100644 --- a/codex-rs/core/tests/suite/approvals.rs +++ b/codex-rs/core/tests/suite/approvals.rs @@ -1561,6 +1561,123 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> { Ok(()) } +#[tokio::test(flavor = "current_thread")] +#[cfg(unix)] +async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let approval_policy = AskForApproval::OnRequest; + let sandbox_policy = SandboxPolicy::WorkspaceWrite { + writable_roots: vec![], + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }; + let sandbox_policy_for_config = sandbox_policy.clone(); + + let mut builder = test_codex() + .with_model("gpt-5.1-codex") + .with_config(move |config| { + config.approval_policy = Constrained::allow_any(approval_policy); + config.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); + }); + let test = builder.build(&server).await?; + + let target = TargetPath::OutsideWorkspace("apply_patch_allow_session.txt"); + let (path, patch_path) = target.resolve_for_patch(&test); + let _ = fs::remove_file(&path); + + let patch_add = build_add_file_patch(&patch_path, "before"); + let patch_update = format!( + "*** Begin Patch\n*** Update File: {patch_path}\n@@\n-before\n+after\n*** End Patch\n" + ); + + let call_id_1 = "apply_patch_allow_session_1"; + let call_id_2 = "apply_patch_allow_session_2"; + + let _ = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_apply_patch_function_call(call_id_1, &patch_add), + ev_completed("resp-1"), + ]), + ) + .await; + let _ = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + submit_turn( + &test, + "apply_patch allow session", + approval_policy, + sandbox_policy.clone(), + ) + .await?; + let _ = expect_patch_approval(&test, call_id_1).await; + test.codex + .submit(Op::PatchApproval { + id: "0".into(), + decision: ReviewDecision::ApprovedForSession, + }) + .await?; + wait_for_completion(&test).await; + assert!(fs::read_to_string(&path)?.contains("before")); + + let _ = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-3"), + ev_apply_patch_function_call(call_id_2, &patch_update), + ev_completed("resp-3"), + ]), + ) + .await; + let _ = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-2", "done"), + ev_completed("resp-4"), + ]), + ) + .await; + + submit_turn( + &test, + "apply_patch allow session followup", + approval_policy, + sandbox_policy.clone(), + ) + .await?; + + let event = wait_for_event(&test.codex, |event| { + matches!( + event, + EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TaskComplete(_) + ) + }) + .await; + match event { + EventMsg::TaskComplete(_) => {} + EventMsg::ApplyPatchApprovalRequest(event) => { + panic!("unexpected patch approval request: {:?}", event.call_id) + } + other => panic!("unexpected event: {other:?}"), + } + + assert!(fs::read_to_string(&path)?.contains("after")); + let _ = fs::remove_file(path); + + Ok(()) +} + #[tokio::test(flavor = "current_thread")] #[cfg(unix)] async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> { diff --git a/codex-rs/core/tests/suite/cli_stream.rs b/codex-rs/core/tests/suite/cli_stream.rs index 8901dd210e9..633d55a608a 100644 --- a/codex-rs/core/tests/suite/cli_stream.rs +++ b/codex-rs/core/tests/suite/cli_stream.rs @@ -1,6 +1,7 @@ use assert_cmd::Command as AssertCommand; use codex_core::RolloutRecorder; use codex_core::protocol::GitInfo; +use codex_utils_cargo_bin::find_resource; use core_test_support::fs_wait; use core_test_support::skip_if_no_network; use std::time::Duration; @@ -12,6 +13,16 @@ use wiremock::ResponseTemplate; use wiremock::matchers::method; use wiremock::matchers::path; +fn repo_root() -> std::path::PathBuf { + #[expect(clippy::expect_used)] + find_resource!(".").expect("failed to resolve repo root") +} + +fn cli_responses_fixture() -> std::path::PathBuf { + #[expect(clippy::expect_used)] + find_resource!("tests/cli_responses_fixture.sse").expect("failed to resolve fixture path") +} + /// Tests streaming chat completions through the CLI using a mock server. /// This test: /// 1. Sets up a mock server that simulates OpenAI's chat completions API @@ -23,6 +34,7 @@ async fn chat_mode_stream_cli() { skip_if_no_network!(); let server = MockServer::start().await; + let repo_root = repo_root(); let sse = concat!( "data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n", "data: {\"choices\":[{\"delta\":{}}]}\n\n", @@ -53,7 +65,7 @@ async fn chat_mode_stream_cli() { .arg("-c") .arg("model_provider=\"mock\"") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg("hello?"); cmd.env("CODEX_HOME", home.path()) .env("OPENAI_API_KEY", "dummy") @@ -72,7 +84,7 @@ async fn chat_mode_stream_cli() { // Verify a new session rollout was created and is discoverable via list_conversations let provider_filter = vec!["mock".to_string()]; - let page = RolloutRecorder::list_conversations( + let page = RolloutRecorder::list_threads( home.path(), 10, None, @@ -127,6 +139,7 @@ async fn exec_cli_applies_experimental_instructions_file() { ); let home = TempDir::new().unwrap(); + let repo_root = repo_root(); let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap(); let mut cmd = AssertCommand::new(bin); cmd.arg("exec") @@ -140,7 +153,7 @@ async fn exec_cli_applies_experimental_instructions_file() { "experimental_instructions_file=\"{custom_path_str}\"" )) .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg("hello?\n"); cmd.env("CODEX_HOME", home.path()) .env("OPENAI_API_KEY", "dummy") @@ -177,8 +190,8 @@ async fn exec_cli_applies_experimental_instructions_file() { async fn responses_api_stream_cli() { skip_if_no_network!(); - let fixture = - std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse"); + let fixture = cli_responses_fixture(); + let repo_root = repo_root(); let home = TempDir::new().unwrap(); let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap(); @@ -186,7 +199,7 @@ async fn responses_api_stream_cli() { cmd.arg("exec") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg("hello?"); cmd.env("CODEX_HOME", home.path()) .env("OPENAI_API_KEY", "dummy") @@ -213,8 +226,8 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> { let prompt = format!("echo {marker}"); // 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic. - let fixture = - std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse"); + let fixture = cli_responses_fixture(); + let repo_root = repo_root(); // 4. Run the codex CLI and invoke `exec`, which is what records a session. let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap(); @@ -222,7 +235,7 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> { cmd.arg("exec") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt); cmd.env("CODEX_HOME", home.path()) .env("OPENAI_API_KEY", "dummy") @@ -343,7 +356,7 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> { cmd2.arg("exec") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt2) .arg("resume") .arg("--last"); diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index a22027f99fb..d137d1582fd 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -1,27 +1,26 @@ use codex_core::AuthManager; use codex_core::CodexAuth; use codex_core::ContentItem; -use codex_core::ConversationManager; use codex_core::LocalShellAction; use codex_core::LocalShellExecAction; use codex_core::LocalShellStatus; use codex_core::ModelClient; use codex_core::ModelProviderInfo; -use codex_core::NewConversation; +use codex_core::NewThread; use codex_core::Prompt; use codex_core::ResponseEvent; use codex_core::ResponseItem; +use codex_core::ThreadManager; use codex_core::WireApi; use codex_core::auth::AuthCredentialsStoreMode; use codex_core::built_in_model_providers; use codex_core::error::CodexErr; -use codex_core::features::Feature; use codex_core::models_manager::manager::ModelsManager; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_core::protocol::SessionSource; -use codex_otel::otel_manager::OtelManager; -use codex_protocol::ConversationId; +use codex_otel::OtelManager; +use codex_protocol::ThreadId; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::Verbosity; use codex_protocol::models::ReasoningItemContent; @@ -259,19 +258,19 @@ async fn resume_includes_initial_messages_and_sends_prior_items() { // Also configure user instructions to ensure they are NOT delivered on resume. config.user_instructions = Some("be nice".to_string()); - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); let auth_manager = codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); - let NewConversation { - conversation: codex, + let NewThread { + thread: codex, session_configured, .. - } = conversation_manager - .resume_conversation_from_rollout(config, session_path.clone(), auth_manager) + } = thread_manager + .resume_thread_from_rollout(config, session_path.clone(), auth_manager) .await .expect("resume conversation"); @@ -290,6 +289,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -346,17 +346,18 @@ async fn includes_conversation_id_and_model_headers_in_request() { let mut config = load_default_config_for_test(&codex_home).await; config.model_provider = model_provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); - let NewConversation { - conversation: codex, - conversation_id, + let NewThread { + thread: codex, + thread_id: conversation_id, session_configured: _, - } = conversation_manager - .new_conversation(config) + .. + } = thread_manager + .start_thread(config) .await .expect("create new conversation"); @@ -365,6 +366,7 @@ async fn includes_conversation_id_and_model_headers_in_request() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -408,22 +410,23 @@ async fn includes_base_instructions_override_in_request() { config.base_instructions = Some("test instructions".to_string()); config.model_provider = model_provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create new conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -469,17 +472,18 @@ async fn chatgpt_auth_sends_correct_request() { let codex_home = TempDir::new().unwrap(); let mut config = load_default_config_for_test(&codex_home).await; config.model_provider = model_provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( create_dummy_codex_auth(), config.model_provider.clone(), config.codex_home.clone(), ); - let NewConversation { - conversation: codex, - conversation_id, + let NewThread { + thread: codex, + thread_id: conversation_id, session_configured: _, - } = conversation_manager - .new_conversation(config) + .. + } = thread_manager + .start_thread(config) .await .expect("create new conversation"); @@ -488,6 +492,7 @@ async fn chatgpt_auth_sends_correct_request() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -568,12 +573,13 @@ async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() { Ok(None) => panic!("No CodexAuth found in codex_home"), Err(e) => panic!("Failed to load CodexAuth: {e}"), }; - let conversation_manager = ConversationManager::new(auth_manager, SessionSource::Exec); - let NewConversation { - conversation: codex, - .. - } = conversation_manager - .new_conversation(config) + let thread_manager = ThreadManager::new( + codex_home.path().to_path_buf(), + auth_manager, + SessionSource::Exec, + ); + let NewThread { thread: codex, .. } = thread_manager + .start_thread(config) .await .expect("create new conversation"); @@ -582,6 +588,7 @@ async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -606,22 +613,23 @@ async fn includes_user_instructions_message_in_request() { config.model_provider = model_provider; config.user_instructions = Some("be nice".to_string()); - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create new conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -674,24 +682,24 @@ async fn skills_append_to_instructions() { let mut config = load_default_config_for_test(&codex_home).await; config.model_provider = model_provider; config.cwd = codex_home.path().to_path_buf(); - config.features.enable(Feature::Skills); - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create new conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -741,6 +749,7 @@ async fn includes_configured_effort_in_request() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -777,6 +786,7 @@ async fn includes_no_effort_in_request() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -798,7 +808,7 @@ async fn includes_no_effort_in_request() -> anyhow::Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn includes_default_reasoning_effort_in_request_when_defined_by_model_family() +async fn includes_default_reasoning_effort_in_request_when_defined_by_model_info() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = MockServer::start().await; @@ -811,6 +821,7 @@ async fn includes_default_reasoning_effort_in_request_when_defined_by_model_fami items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -849,6 +860,7 @@ async fn configured_reasoning_summary_is_sent() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -887,6 +899,7 @@ async fn reasoning_summary_is_omitted_when_disabled() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -919,6 +932,7 @@ async fn includes_default_verbosity_in_request() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -958,6 +972,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -996,6 +1011,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1034,22 +1050,23 @@ async fn includes_developer_instructions_message_in_request() { config.user_instructions = Some("be nice".to_string()); config.developer_instructions = Some("be useful".to_string()); - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create new conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1127,13 +1144,13 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() { let model = ModelsManager::get_model_offline(config.model.as_deref()); config.model = Some(model.clone()); let config = Arc::new(config); - let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config); - let conversation_id = ConversationId::new(); + let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config); + let conversation_id = ThreadId::new(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let otel_manager = OtelManager::new( conversation_id, model.as_str(), - model_family.slug.as_str(), + model_info.slug.as_str(), None, Some("test@test.com".to_string()), auth_manager.get_auth_mode(), @@ -1145,7 +1162,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() { let client = ModelClient::new( Arc::clone(&config), None, - model_family, + model_info, otel_manager, provider, effort, @@ -1264,22 +1281,23 @@ async fn token_count_includes_rate_limits_snapshot() { let mut config = load_default_config_for_test(&home).await; config.model_provider = provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("test"), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1437,6 +1455,7 @@ async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .expect("submission should succeed while emitting usage limit error events"); @@ -1506,6 +1525,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res items: vec![UserInput::Text { text: "seed turn".into(), }], + final_output_json_schema: None, }) .await?; @@ -1516,6 +1536,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res items: vec![UserInput::Text { text: "trigger context window".into(), }], + final_output_json_schema: None, }) .await?; @@ -1619,22 +1640,23 @@ async fn azure_overrides_assign_properties_used_for_responses_url() { let mut config = load_default_config_for_test(&codex_home).await; config.model_provider = provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( create_dummy_codex_auth(), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create new conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1701,22 +1723,23 @@ async fn env_var_overrides_loaded_auth() { let mut config = load_default_config_for_test(&codex_home).await; config.model_provider = provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( create_dummy_codex_auth(), config.model_provider.clone(), config.codex_home.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create new conversation") - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1783,16 +1806,13 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() { let mut config = load_default_config_for_test(&codex_home).await; config.model_provider = model_provider; - let conversation_manager = ConversationManager::with_models_provider_and_home( + let thread_manager = ThreadManager::with_models_provider_and_home( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), config.codex_home.clone(), ); - let NewConversation { - conversation: codex, - .. - } = conversation_manager - .new_conversation(config) + let NewThread { thread: codex, .. } = thread_manager + .start_thread(config) .await .expect("create new conversation"); @@ -1800,6 +1820,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() { codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "U1".into() }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1809,6 +1830,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() { codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "U2".into() }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1818,6 +1840,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() { codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "U3".into() }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index c7556e3388a..e0845ef72b4 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -1,8 +1,8 @@ #![allow(clippy::expect_used)] use codex_core::CodexAuth; -use codex_core::ConversationManager; use codex_core::ModelProviderInfo; -use codex_core::NewConversation; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_core::compact::SUMMARIZATION_PROMPT; use codex_core::compact::SUMMARY_PREFIX; @@ -62,7 +62,7 @@ const DUMMY_CALL_ID: &str = "call-multi-auto"; const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push"; const POST_AUTO_USER_MSG: &str = "post auto follow-up"; -pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted."; +pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long threads and multiple compactions can cause the model to be less accurate. Start a new thread when possible to keep threads small and targeted."; fn auto_summary(summary: &str) -> String { summary.to_string() @@ -144,15 +144,15 @@ async fn summarize_context_three_requests_and_instructions() { config.model_provider = model_provider; set_test_compact_prompt(&mut config); config.model_auto_compact_token_limit = Some(200_000); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { - conversation: codex, + let NewThread { + thread: codex, session_configured, .. - } = conversation_manager.new_conversation(config).await.unwrap(); + } = thread_manager.start_thread(config).await.unwrap(); let rollout_path = session_configured.rollout_path; // 1) Normal user input – should hit server once. @@ -161,6 +161,7 @@ async fn summarize_context_three_requests_and_instructions() { items: vec![UserInput::Text { text: "hello world".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -181,6 +182,7 @@ async fn summarize_context_three_requests_and_instructions() { items: vec![UserInput::Text { text: THIRD_USER_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -338,15 +340,15 @@ async fn manual_compact_uses_custom_prompt() { config.model_provider = model_provider; config.compact_prompt = Some(custom_prompt.to_string()); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create conversation") - .conversation; + .thread; codex.submit(Op::Compact).await.expect("trigger compact"); let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await; @@ -418,14 +420,11 @@ async fn manual_compact_emits_api_and_local_token_usage_events() { config.model_provider = model_provider; set_test_compact_prompt(&mut config); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { - conversation: codex, - .. - } = conversation_manager.new_conversation(config).await.unwrap(); + let NewThread { thread: codex, .. } = thread_manager.start_thread(config).await.unwrap(); // Trigger manual compact and collect TokenCount events for the compact turn. codex.submit(Op::Compact).await.unwrap(); @@ -580,6 +579,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { items: vec![UserInput::Text { text: user_message.into(), }], + final_output_json_schema: None, }) .await .expect("submit user input"); @@ -1069,21 +1069,18 @@ async fn auto_compact_runs_after_token_limit_hit() { config.model_provider = model_provider; set_test_compact_prompt(&mut config); config.model_auto_compact_token_limit = Some(200_000); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let codex = conversation_manager - .new_conversation(config) - .await - .unwrap() - .conversation; + let codex = thread_manager.start_thread(config).await.unwrap().thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: FIRST_AUTO_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1095,6 +1092,7 @@ async fn auto_compact_runs_after_token_limit_hit() { items: vec![UserInput::Text { text: SECOND_AUTO_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1106,6 +1104,7 @@ async fn auto_compact_runs_after_token_limit_hit() { items: vec![UserInput::Text { text: POST_AUTO_USER_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1403,21 +1402,22 @@ async fn auto_compact_persists_rollout_entries() { config.model_provider = model_provider; set_test_compact_prompt(&mut config); config.model_auto_compact_token_limit = Some(200_000); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { - conversation: codex, + let NewThread { + thread: codex, session_configured, .. - } = conversation_manager.new_conversation(config).await.unwrap(); + } = thread_manager.start_thread(config).await.unwrap(); codex .submit(Op::UserInput { items: vec![UserInput::Text { text: FIRST_AUTO_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1428,6 +1428,7 @@ async fn auto_compact_persists_rollout_entries() { items: vec![UserInput::Text { text: SECOND_AUTO_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1438,6 +1439,7 @@ async fn auto_compact_persists_rollout_entries() { items: vec![UserInput::Text { text: POST_AUTO_USER_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1515,20 +1517,21 @@ async fn manual_compact_retries_after_context_window_error() { config.model_provider = model_provider; set_test_compact_prompt(&mut config); config.model_auto_compact_token_limit = Some(200_000); - let codex = ConversationManager::with_models_provider( + let codex = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ) - .new_conversation(config) + .start_thread(config) .await .unwrap() - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "first turn".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1541,7 +1544,7 @@ async fn manual_compact_retries_after_context_window_error() { panic!("expected background event after compact retry"); }; assert!( - event.message.contains("Trimmed 1 older conversation item"), + event.message.contains("Trimmed 1 older thread item"), "background event should mention trimmed item count: {}", event.message ); @@ -1647,20 +1650,21 @@ async fn manual_compact_twice_preserves_latest_user_messages() { let mut config = load_default_config_for_test(&home).await; config.model_provider = model_provider; set_test_compact_prompt(&mut config); - let codex = ConversationManager::with_models_provider( + let codex = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ) - .new_conversation(config) + .start_thread(config) .await .unwrap() - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: first_user_message.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1674,6 +1678,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() { items: vec![UserInput::Text { text: second_user_message.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1687,6 +1692,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() { items: vec![UserInput::Text { text: final_user_message.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1851,21 +1857,18 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_ config.model_provider = model_provider; set_test_compact_prompt(&mut config); config.model_auto_compact_token_limit = Some(200); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let codex = conversation_manager - .new_conversation(config) - .await - .unwrap() - .conversation; + let codex = thread_manager.start_thread(config).await.unwrap().thread; let mut auto_compact_lifecycle_events = Vec::new(); for user in [MULTI_AUTO_MSG, follow_up_user, final_user] { codex .submit(Op::UserInput { items: vec![UserInput::Text { text: user.into() }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1964,20 +1967,21 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() { config.model_context_window = Some(context_window); config.model_auto_compact_token_limit = Some(limit); - let codex = ConversationManager::with_models_provider( + let codex = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ) - .new_conversation(config) + .start_thread(config) .await .unwrap() - .conversation; + .thread; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: FUNCTION_CALL_LIMIT_MSG.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1989,6 +1993,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() { items: vec![UserInput::Text { text: follow_up_user.into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -2103,6 +2108,7 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { codex .submit(Op::UserInput { items: vec![UserInput::Text { text: user.into() }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 34e44419b41..5a2fb54530e 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -74,6 +74,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> { items: vec![UserInput::Text { text: "hello remote compact".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -86,6 +87,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> { items: vec![UserInput::Text { text: "after compact".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -191,6 +193,7 @@ async fn remote_compact_runs_automatically() -> Result<()> { items: vec![UserInput::Text { text: "hello remote compact".into(), }], + final_output_json_schema: None, }) .await?; let message = wait_for_event_match(&codex, |ev| match ev { @@ -263,6 +266,7 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> items: vec![UserInput::Text { text: "needs compaction".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; diff --git a/codex-rs/core/tests/suite/compact_resume_fork.rs b/codex-rs/core/tests/suite/compact_resume_fork.rs index 75468ae145c..4ad829f07ab 100644 --- a/codex-rs/core/tests/suite/compact_resume_fork.rs +++ b/codex-rs/core/tests/suite/compact_resume_fork.rs @@ -11,10 +11,10 @@ use super::compact::COMPACT_WARNING_MESSAGE; use super::compact::FIRST_REPLY; use super::compact::SUMMARY_TEXT; use codex_core::CodexAuth; -use codex_core::CodexConversation; -use codex_core::ConversationManager; +use codex_core::CodexThread; use codex_core::ModelProviderInfo; -use codex_core::NewConversation; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_core::compact::SUMMARIZATION_PROMPT; use codex_core::config::Config; @@ -171,7 +171,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { "compact+resume test expects resumed path {resumed_path:?} to exist", ); - let forked = fork_conversation(&manager, &config, resumed_path, 2).await; + let forked = fork_thread(&manager, &config, resumed_path, 2).await; user_turn(&forked, "AFTER_FORK").await; // 3. Capture the requests to the model and validate the history slices. @@ -623,7 +623,7 @@ async fn compact_resume_after_second_compaction_preserves_history() { "second compact test expects resumed path {resumed_path:?} to exist", ); - let forked = fork_conversation(&manager, &config, resumed_path, 3).await; + let forked = fork_thread(&manager, &config, resumed_path, 3).await; user_turn(&forked, "AFTER_FORK").await; compact_conversation(&forked).await; @@ -855,7 +855,7 @@ async fn mount_second_compact_flow(server: &MockServer) { async fn start_test_conversation( server: &MockServer, model: Option<&str>, -) -> (TempDir, Config, ConversationManager, Arc) { +) -> (TempDir, Config, ThreadManager, Arc) { let model_provider = ModelProviderInfo { name: "Non-OpenAI Model provider".into(), base_url: Some(format!("{}/v1", server.uri())), @@ -868,29 +868,30 @@ async fn start_test_conversation( if let Some(model) = model { config.model = Some(model.to_string()); } - let manager = ConversationManager::with_models_provider( + let manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { conversation, .. } = manager - .new_conversation(config.clone()) + let NewThread { thread, .. } = manager + .start_thread(config.clone()) .await .expect("create conversation"); - (home, config, manager, conversation) + (home, config, manager, thread) } -async fn user_turn(conversation: &Arc, text: &str) { +async fn user_turn(conversation: &Arc, text: &str) { conversation .submit(Op::UserInput { items: vec![UserInput::Text { text: text.into() }], + final_output_json_schema: None, }) .await .expect("submit user turn"); wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; } -async fn compact_conversation(conversation: &Arc) { +async fn compact_conversation(conversation: &Arc) { conversation .submit(Op::Compact) .await @@ -903,34 +904,34 @@ async fn compact_conversation(conversation: &Arc) { wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; } -async fn fetch_conversation_path(conversation: &Arc) -> std::path::PathBuf { +async fn fetch_conversation_path(conversation: &Arc) -> std::path::PathBuf { conversation.rollout_path() } async fn resume_conversation( - manager: &ConversationManager, + manager: &ThreadManager, config: &Config, path: std::path::PathBuf, -) -> Arc { +) -> Arc { let auth_manager = codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy")); - let NewConversation { conversation, .. } = manager - .resume_conversation_from_rollout(config.clone(), path, auth_manager) + let NewThread { thread, .. } = manager + .resume_thread_from_rollout(config.clone(), path, auth_manager) .await .expect("resume conversation"); - conversation + thread } #[cfg(test)] -async fn fork_conversation( - manager: &ConversationManager, +async fn fork_thread( + manager: &ThreadManager, config: &Config, path: std::path::PathBuf, nth_user_message: usize, -) -> Arc { - let NewConversation { conversation, .. } = manager - .fork_conversation(nth_user_message, config.clone(), path) +) -> Arc { + let NewThread { thread, .. } = manager + .fork_thread(nth_user_message, config.clone(), path) .await .expect("fork conversation"); - conversation + thread } diff --git a/codex-rs/core/tests/suite/exec_policy.rs b/codex-rs/core/tests/suite/exec_policy.rs index 470478ad759..c31df5036f4 100644 --- a/codex-rs/core/tests/suite/exec_policy.rs +++ b/codex-rs/core/tests/suite/exec_policy.rs @@ -97,7 +97,7 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> { assert!( end.aggregated_output - .contains("execpolicy forbids this command"), + .contains("policy forbids commands starting with `echo`"), "unexpected output: {}", end.aggregated_output ); diff --git a/codex-rs/core/tests/suite/fork_conversation.rs b/codex-rs/core/tests/suite/fork_thread.rs similarity index 89% rename from codex-rs/core/tests/suite/fork_conversation.rs rename to codex-rs/core/tests/suite/fork_thread.rs index d302b4d77a2..50a6dba1fbb 100644 --- a/codex-rs/core/tests/suite/fork_conversation.rs +++ b/codex-rs/core/tests/suite/fork_thread.rs @@ -1,7 +1,7 @@ use codex_core::CodexAuth; -use codex_core::ConversationManager; use codex_core::ModelProviderInfo; -use codex_core::NewConversation; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_core::parse_turn_item; use codex_core::protocol::EventMsg; @@ -26,7 +26,7 @@ fn sse_completed(id: &str) -> String { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn fork_conversation_twice_drops_to_first_message() { +async fn fork_thread_twice_drops_to_first_message() { skip_if_no_network!(); // Start a mock server that completes three turns. @@ -55,15 +55,12 @@ async fn fork_conversation_twice_drops_to_first_message() { config.model_provider = model_provider.clone(); let config_for_fork = config.clone(); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { - conversation: codex, - .. - } = conversation_manager - .new_conversation(config) + let NewThread { thread: codex, .. } = thread_manager + .start_thread(config) .await .expect("create conversation"); @@ -74,6 +71,7 @@ async fn fork_conversation_twice_drops_to_first_message() { items: vec![UserInput::Text { text: text.to_string(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -128,11 +126,11 @@ async fn fork_conversation_twice_drops_to_first_message() { // After dropping again (n=1 on fork1), compute expected relative to fork1's rollout. // Fork once with n=1 → drops the last user input and everything after. - let NewConversation { - conversation: codex_fork1, + let NewThread { + thread: codex_fork1, .. - } = conversation_manager - .fork_conversation(1, config_for_fork.clone(), base_path.clone()) + } = thread_manager + .fork_thread(1, config_for_fork.clone(), base_path.clone()) .await .expect("fork 1"); @@ -146,11 +144,11 @@ async fn fork_conversation_twice_drops_to_first_message() { ); // Fork again with n=0 → drops the (new) last user message, leaving only the first. - let NewConversation { - conversation: codex_fork2, + let NewThread { + thread: codex_fork2, .. - } = conversation_manager - .fork_conversation(0, config_for_fork.clone(), fork1_path.clone()) + } = thread_manager + .fork_thread(0, config_for_fork.clone(), fork1_path.clone()) .await .expect("fork 2"); diff --git a/codex-rs/core/tests/suite/items.rs b/codex-rs/core/tests/suite/items.rs index 4ec23e9de66..3c6cf5ff3ef 100644 --- a/codex-rs/core/tests/suite/items.rs +++ b/codex-rs/core/tests/suite/items.rs @@ -43,6 +43,7 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> { items: (vec![UserInput::Text { text: "please inspect sample.txt".into(), }]), + final_output_json_schema: None, }) .await?; @@ -99,6 +100,7 @@ async fn assistant_message_item_is_emitted() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "please summarize results".into(), }], + final_output_json_schema: None, }) .await?; @@ -155,6 +157,7 @@ async fn reasoning_item_is_emitted() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "explain your reasoning".into(), }], + final_output_json_schema: None, }) .await?; @@ -213,6 +216,7 @@ async fn web_search_item_is_emitted() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "find the weather".into(), }], + final_output_json_schema: None, }) .await?; @@ -265,6 +269,7 @@ async fn agent_message_content_delta_has_item_metadata() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "please stream text".into(), }], + final_output_json_schema: None, }) .await?; @@ -330,6 +335,7 @@ async fn reasoning_content_delta_has_item_metadata() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "reason through it".into(), }], + final_output_json_schema: None, }) .await?; @@ -387,6 +393,7 @@ async fn reasoning_raw_content_delta_respects_flag() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "show raw reasoning".into(), }], + final_output_json_schema: None, }) .await?; diff --git a/codex-rs/core/tests/suite/list_models.rs b/codex-rs/core/tests/suite/list_models.rs index 565b978faa2..b81ebcb72df 100644 --- a/codex-rs/core/tests/suite/list_models.rs +++ b/codex-rs/core/tests/suite/list_models.rs @@ -1,6 +1,6 @@ use anyhow::Result; use codex_core::CodexAuth; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffort; @@ -13,7 +13,7 @@ use tempfile::tempdir; async fn list_models_returns_api_key_models() -> Result<()> { let codex_home = tempdir()?; let config = load_default_config_for_test(&codex_home).await; - let manager = ConversationManager::with_models_provider( + let manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("sk-test"), built_in_model_providers()["openai"].clone(), ); @@ -29,7 +29,7 @@ async fn list_models_returns_api_key_models() -> Result<()> { async fn list_models_returns_chatgpt_models() -> Result<()> { let codex_home = tempdir()?; let config = load_default_config_for_test(&codex_home).await; - let manager = ConversationManager::with_models_provider( + let manager = ThreadManager::with_models_provider( CodexAuth::create_dummy_chatgpt_auth_for_testing(), built_in_model_providers()["openai"].clone(), ); @@ -42,7 +42,18 @@ async fn list_models_returns_chatgpt_models() -> Result<()> { } fn expected_models_for_api_key() -> Vec { - vec![gpt_5_1_codex_max(), gpt_5_1_codex_mini(), gpt_5_2()] + vec![ + gpt_5_1_codex_max(), + gpt_5_1_codex_mini(), + gpt_5_2(), + bengalfox(), + boomslang(), + gpt_5_codex(), + gpt_5_codex_mini(), + gpt_5_1_codex(), + gpt_5(), + gpt_5_1(), + ] } fn expected_models_for_chatgpt() -> Vec { @@ -53,6 +64,13 @@ fn expected_models_for_chatgpt() -> Vec { gpt_5_1_codex_max, gpt_5_1_codex_mini(), gpt_5_2(), + bengalfox(), + boomslang(), + gpt_5_codex(), + gpt_5_codex_mini(), + gpt_5_1_codex(), + gpt_5(), + gpt_5_1(), ] } @@ -168,7 +186,7 @@ fn gpt_5_2() -> ModelPreset { ), effort( ReasoningEffort::XHigh, - "Extra high reasoning for complex problems", + "Extra high reasoning depth for complex problems", ), ], is_default: false, @@ -178,6 +196,210 @@ fn gpt_5_2() -> ModelPreset { } } +fn bengalfox() -> ModelPreset { + ModelPreset { + id: "bengalfox".to_string(), + model: "bengalfox".to_string(), + display_name: "bengalfox".to_string(), + description: "bengalfox".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Low, + "Fast responses with lighter reasoning", + ), + effort( + ReasoningEffort::Medium, + "Balances speed and reasoning depth for everyday tasks", + ), + effort( + ReasoningEffort::High, + "Greater reasoning depth for complex problems", + ), + effort( + ReasoningEffort::XHigh, + "Extra high reasoning depth for complex problems", + ), + ], + is_default: false, + upgrade: None, + show_in_picker: false, + supported_in_api: true, + } +} + +fn boomslang() -> ModelPreset { + ModelPreset { + id: "boomslang".to_string(), + model: "boomslang".to_string(), + display_name: "boomslang".to_string(), + description: "boomslang".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Low, + "Balances speed with some reasoning; useful for straightforward queries and short explanations", + ), + effort( + ReasoningEffort::Medium, + "Provides a solid balance of reasoning depth and latency for general-purpose tasks", + ), + effort( + ReasoningEffort::High, + "Maximizes reasoning depth for complex or ambiguous problems", + ), + effort( + ReasoningEffort::XHigh, + "Extra high reasoning depth for complex problems", + ), + ], + is_default: false, + upgrade: None, + show_in_picker: false, + supported_in_api: true, + } +} + +fn gpt_5_codex() -> ModelPreset { + ModelPreset { + id: "gpt-5-codex".to_string(), + model: "gpt-5-codex".to_string(), + display_name: "gpt-5-codex".to_string(), + description: "Optimized for codex.".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Low, + "Fastest responses with limited reasoning", + ), + effort( + ReasoningEffort::Medium, + "Dynamically adjusts reasoning based on the task", + ), + effort( + ReasoningEffort::High, + "Maximizes reasoning depth for complex or ambiguous problems", + ), + ], + is_default: false, + upgrade: Some(gpt52_codex_upgrade()), + show_in_picker: false, + supported_in_api: true, + } +} + +fn gpt_5_codex_mini() -> ModelPreset { + ModelPreset { + id: "gpt-5-codex-mini".to_string(), + model: "gpt-5-codex-mini".to_string(), + display_name: "gpt-5-codex-mini".to_string(), + description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Medium, + "Dynamically adjusts reasoning based on the task", + ), + effort( + ReasoningEffort::High, + "Maximizes reasoning depth for complex or ambiguous problems", + ), + ], + is_default: false, + upgrade: Some(gpt52_codex_upgrade()), + show_in_picker: false, + supported_in_api: true, + } +} + +fn gpt_5_1_codex() -> ModelPreset { + ModelPreset { + id: "gpt-5.1-codex".to_string(), + model: "gpt-5.1-codex".to_string(), + display_name: "gpt-5.1-codex".to_string(), + description: "Optimized for codex.".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Low, + "Fastest responses with limited reasoning", + ), + effort( + ReasoningEffort::Medium, + "Dynamically adjusts reasoning based on the task", + ), + effort( + ReasoningEffort::High, + "Maximizes reasoning depth for complex or ambiguous problems", + ), + ], + is_default: false, + upgrade: Some(gpt52_codex_upgrade()), + show_in_picker: false, + supported_in_api: true, + } +} + +fn gpt_5() -> ModelPreset { + ModelPreset { + id: "gpt-5".to_string(), + model: "gpt-5".to_string(), + display_name: "gpt-5".to_string(), + description: "Broad world knowledge with strong general reasoning.".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Minimal, + "Fastest responses with little reasoning", + ), + effort( + ReasoningEffort::Low, + "Balances speed with some reasoning; useful for straightforward queries and short explanations", + ), + effort( + ReasoningEffort::Medium, + "Provides a solid balance of reasoning depth and latency for general-purpose tasks", + ), + effort( + ReasoningEffort::High, + "Maximizes reasoning depth for complex or ambiguous problems", + ), + ], + is_default: false, + upgrade: Some(gpt52_codex_upgrade()), + show_in_picker: false, + supported_in_api: true, + } +} + +fn gpt_5_1() -> ModelPreset { + ModelPreset { + id: "gpt-5.1".to_string(), + model: "gpt-5.1".to_string(), + display_name: "gpt-5.1".to_string(), + description: "Broad world knowledge with strong general reasoning.".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + effort( + ReasoningEffort::Low, + "Balances speed with some reasoning; useful for straightforward queries and short explanations", + ), + effort( + ReasoningEffort::Medium, + "Provides a solid balance of reasoning depth and latency for general-purpose tasks", + ), + effort( + ReasoningEffort::High, + "Maximizes reasoning depth for complex or ambiguous problems", + ), + ], + is_default: false, + upgrade: Some(gpt52_codex_upgrade()), + show_in_picker: false, + supported_in_api: true, + } +} + fn gpt52_codex_upgrade() -> codex_protocol::openai_models::ModelUpgrade { codex_protocol::openai_models::ModelUpgrade { id: "gpt-5.2-codex".to_string(), diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 63784bd4032..effbc8a9316 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -28,13 +28,14 @@ mod compact_resume_fork; mod deprecation_notice; mod exec; mod exec_policy; -mod fork_conversation; +mod fork_thread; mod grep_files; mod items; mod json_result; mod list_dir; mod list_models; mod live_cli; +mod model_info_overrides; mod model_overrides; mod model_tools; mod models_etag_responses; @@ -43,6 +44,7 @@ mod prompt_caching; mod quota_exceeded; mod read_file; mod remote_models; +mod request_compression; mod resume; mod resume_warning; mod review; @@ -65,3 +67,4 @@ mod unified_exec; mod user_notification; mod user_shell_cmd; mod view_image; +mod web_search_cached; diff --git a/codex-rs/core/tests/suite/model_info_overrides.rs b/codex-rs/core/tests/suite/model_info_overrides.rs new file mode 100644 index 00000000000..9d9c0feef3f --- /dev/null +++ b/codex-rs/core/tests/suite/model_info_overrides.rs @@ -0,0 +1,32 @@ +use codex_core::models_manager::manager::ModelsManager; +use codex_protocol::openai_models::TruncationPolicyConfig; +use core_test_support::load_default_config_for_test; +use pretty_assertions::assert_eq; +use tempfile::TempDir; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn offline_model_info_without_tool_output_override() { + let codex_home = TempDir::new().expect("create temp dir"); + let config = load_default_config_for_test(&codex_home).await; + + let model_info = ModelsManager::construct_model_info_offline("gpt-5.1", &config); + + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::bytes(10_000) + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn offline_model_info_with_tool_output_override() { + let codex_home = TempDir::new().expect("create temp dir"); + let mut config = load_default_config_for_test(&codex_home).await; + config.tool_output_token_limit = Some(123); + + let model_info = ModelsManager::construct_model_info_offline("gpt-5.1-codex", &config); + + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::tokens(123) + ); +} diff --git a/codex-rs/core/tests/suite/model_overrides.rs b/codex-rs/core/tests/suite/model_overrides.rs index f7cdac67c16..a418e35a390 100644 --- a/codex-rs/core/tests/suite/model_overrides.rs +++ b/codex-rs/core/tests/suite/model_overrides.rs @@ -1,5 +1,5 @@ use codex_core::CodexAuth; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_protocol::openai_models::ReasoningEffort; @@ -22,15 +22,15 @@ async fn override_turn_context_does_not_persist_when_config_exists() { let mut config = load_default_config_for_test(&codex_home).await; config.model = Some("gpt-4o".to_string()); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create conversation") - .conversation; + .thread; codex .submit(Op::OverrideTurnContext { @@ -64,15 +64,15 @@ async fn override_turn_context_does_not_create_config_file() { let config = load_default_config_for_test(&codex_home).await; - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), ); - let codex = conversation_manager - .new_conversation(config) + let codex = thread_manager + .start_thread(config) .await .expect("create conversation") - .conversation; + .thread; codex .submit(Op::OverrideTurnContext { diff --git a/codex-rs/core/tests/suite/otel.rs b/codex-rs/core/tests/suite/otel.rs index e19c41da864..4fa45f01087 100644 --- a/codex-rs/core/tests/suite/otel.rs +++ b/codex-rs/core/tests/suite/otel.rs @@ -46,6 +46,7 @@ async fn responses_api_emits_api_request_event() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -87,6 +88,7 @@ async fn process_sse_emits_tracing_for_output_item() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -125,6 +127,7 @@ async fn process_sse_emits_failed_event_on_parse_error() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -164,6 +167,7 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed() items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -223,6 +227,7 @@ async fn process_sse_failed_event_records_response_error_message() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -280,6 +285,7 @@ async fn process_sse_failed_event_logs_parse_error() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -324,6 +330,7 @@ async fn process_sse_failed_event_logs_missing_error() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -377,6 +384,7 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -427,6 +435,7 @@ async fn process_sse_emits_completed_telemetry() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -494,6 +503,7 @@ async fn handle_responses_span_records_response_kind_and_tool_name() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -558,6 +568,7 @@ async fn record_responses_sets_span_fields_for_response_events() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -637,6 +648,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -704,6 +716,7 @@ async fn handle_response_item_records_tool_result_for_function_call() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -781,6 +794,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -842,6 +856,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -946,6 +961,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -994,6 +1010,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() { items: vec![UserInput::Text { text: "approved".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1052,6 +1069,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision() items: vec![UserInput::Text { text: "persist".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1110,6 +1128,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { items: vec![UserInput::Text { text: "retry".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1168,6 +1187,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() { items: vec![UserInput::Text { text: "deny".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1226,6 +1246,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() items: vec![UserInput::Text { text: "persist".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -1285,6 +1306,7 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() { items: vec![UserInput::Text { text: "deny".into(), }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index c21174014d1..bde146ed47c 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -75,7 +75,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { let TestCodex { codex, config, - conversation_manager, + thread_manager, .. } = test_codex() .with_config(|config| { @@ -84,9 +84,9 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { }) .build(&server) .await?; - let base_instructions = conversation_manager + let base_instructions = thread_manager .get_models_manager() - .construct_model_family( + .construct_model_info( config .model .as_deref() @@ -94,14 +94,14 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { &config, ) .await - .base_instructions - .clone(); + .base_instructions; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "hello 1".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -111,6 +111,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello 2".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -130,7 +131,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { base_instructions } else { [ - base_instructions.clone(), + base_instructions, include_str!("../../../apply-patch/apply_patch_tool_instructions.md").to_string(), ] .join("\n") @@ -175,6 +176,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello 1".into(), }], + final_output_json_schema: None, }) .await?; @@ -184,6 +186,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> { items: vec![UserInput::Text { text: "hello 2".into(), }], + final_output_json_schema: None, }) .await?; @@ -238,6 +241,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests items: vec![UserInput::Text { text: "hello 1".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -247,6 +251,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests items: vec![UserInput::Text { text: "hello 2".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -307,6 +312,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an items: vec![UserInput::Text { text: "hello 1".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -334,6 +340,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an items: vec![UserInput::Text { text: "hello 2".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -412,6 +419,7 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul items: vec![UserInput::Text { text: "first message".into(), }], + final_output_json_schema: None, }) .await?; @@ -504,6 +512,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res items: vec![UserInput::Text { text: "hello 1".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; diff --git a/codex-rs/core/tests/suite/quota_exceeded.rs b/codex-rs/core/tests/suite/quota_exceeded.rs index 0156c8d1169..e7ccd538478 100644 --- a/codex-rs/core/tests/suite/quota_exceeded.rs +++ b/codex-rs/core/tests/suite/quota_exceeded.rs @@ -44,6 +44,7 @@ async fn quota_exceeded_emits_single_error_event() -> Result<()> { items: vec![UserInput::Text { text: "quota?".into(), }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 201d61c9e0d..71a3106bc38 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -4,9 +4,9 @@ use std::sync::Arc; use anyhow::Result; use codex_core::CodexAuth; -use codex_core::CodexConversation; -use codex_core::ConversationManager; +use codex_core::CodexThread; use codex_core::ModelProviderInfo; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_core::config::Config; use codex_core::features::Feature; @@ -64,7 +64,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { slug: REMOTE_MODEL_SLUG.to_string(), display_name: "Remote Test".to_string(), description: Some("A remote model that requires the test shell".to_string()), - default_reasoning_level: ReasoningEffort::Medium, + default_reasoning_level: Some(ReasoningEffort::Medium), supported_reasoning_levels: vec![ReasoningEffortPreset { effort: ReasoningEffort::Medium, description: ReasoningEffort::Medium.to_string(), @@ -74,14 +74,16 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { supported_in_api: true, priority: 1, upgrade: None, - base_instructions: None, + base_instructions: "base instructions".to_string(), supports_reasoning_summaries: false, support_verbosity: false, default_verbosity: None, apply_patch_tool_type: None, truncation_policy: TruncationPolicyConfig::bytes(10_000), supports_parallel_tool_calls: false, - context_window: None, + context_window: Some(272_000), + auto_compact_token_limit: None, + effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), }; @@ -103,11 +105,11 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { codex, cwd, config, - conversation_manager, + thread_manager, .. } = harness; - let models_manager = conversation_manager.get_models_manager(); + let models_manager = thread_manager.get_models_manager(); let available_model = wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG, &config).await; @@ -121,10 +123,10 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { ); assert_eq!(requests[0].url.path(), "/v1/models"); - let family = models_manager - .construct_model_family(REMOTE_MODEL_SLUG, &config) + let model_info = models_manager + .construct_model_info(REMOTE_MODEL_SLUG, &config) .await; - assert_eq!(family.shell_type, ConfigShellToolType::UnifiedExec); + assert_eq!(model_info.shell_type, ConfigShellToolType::UnifiedExec); codex .submit(Op::OverrideTurnContext { @@ -184,6 +186,95 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_models_truncation_policy_without_override_preserves_remote() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::builder() + .body_print_limit(BodyPrintLimit::Limited(80_000)) + .start() + .await; + + let slug = "codex-test-truncation-policy"; + let remote_model = test_remote_model_with_policy( + slug, + ModelVisibility::List, + 1, + TruncationPolicyConfig::bytes(12_000), + ); + mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model], + }, + ) + .await; + + let harness = build_remote_models_harness(&server, |config| { + config.model = Some("gpt-5.1".to_string()); + }) + .await?; + + let models_manager = harness.thread_manager.get_models_manager(); + wait_for_model_available(&models_manager, slug, &harness.config).await; + + let model_info = models_manager + .construct_model_info(slug, &harness.config) + .await; + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::bytes(12_000) + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_models_truncation_policy_with_tool_output_override() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::builder() + .body_print_limit(BodyPrintLimit::Limited(80_000)) + .start() + .await; + + let slug = "codex-test-truncation-override"; + let remote_model = test_remote_model_with_policy( + slug, + ModelVisibility::List, + 1, + TruncationPolicyConfig::bytes(10_000), + ); + mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model], + }, + ) + .await; + + let harness = build_remote_models_harness(&server, |config| { + config.model = Some("gpt-5.1".to_string()); + config.tool_output_token_limit = Some(50); + }) + .await?; + + let models_manager = harness.thread_manager.get_models_manager(); + wait_for_model_available(&models_manager, slug, &harness.config).await; + + let model_info = models_manager + .construct_model_info(slug, &harness.config) + .await; + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::bytes(200) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn remote_models_apply_remote_base_instructions() -> Result<()> { skip_if_no_network!(Ok(())); @@ -201,7 +292,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { slug: model.to_string(), display_name: "Parallel Remote".to_string(), description: Some("A remote model with custom instructions".to_string()), - default_reasoning_level: ReasoningEffort::Medium, + default_reasoning_level: Some(ReasoningEffort::Medium), supported_reasoning_levels: vec![ReasoningEffortPreset { effort: ReasoningEffort::Medium, description: ReasoningEffort::Medium.to_string(), @@ -211,14 +302,16 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { supported_in_api: true, priority: 1, upgrade: None, - base_instructions: Some(remote_base.to_string()), + base_instructions: remote_base.to_string(), supports_reasoning_summaries: false, support_verbosity: false, default_verbosity: None, apply_patch_tool_type: None, truncation_policy: TruncationPolicyConfig::bytes(10_000), supports_parallel_tool_calls: false, - context_window: None, + context_window: Some(272_000), + auto_compact_token_limit: None, + effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), }; mount_models_once( @@ -249,11 +342,11 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { codex, cwd, config, - conversation_manager, + thread_manager, .. } = harness; - let models_manager = conversation_manager.get_models_manager(); + let models_manager = thread_manager.get_models_manager(); wait_for_model_available(&models_manager, model, &config).await; codex @@ -316,6 +409,7 @@ async fn remote_models_preserve_builtin_presets() -> Result<()> { ..built_in_model_providers()["openai"].clone() }; let manager = ModelsManager::with_provider( + codex_home.path().to_path_buf(), codex_core::auth::AuthManager::from_auth_for_testing(auth), provider, ); @@ -373,6 +467,7 @@ async fn remote_models_hide_picker_only_models() -> Result<()> { ..built_in_model_providers()["openai"].clone() }; let manager = ModelsManager::with_provider( + codex_home.path().to_path_buf(), codex_core::auth::AuthManager::from_auth_for_testing(auth), provider, ); @@ -381,12 +476,11 @@ async fn remote_models_hide_picker_only_models() -> Result<()> { assert_eq!(selected, "gpt-5.2-codex"); let available = manager.list_models(&config).await; - assert!( - available - .iter() - .all(|model| model.model != "codex-auto-balanced"), - "hidden models should not appear in the picker list" - ); + let hidden = available + .iter() + .find(|model| model.model == "codex-auto-balanced") + .expect("hidden remote model should be listed"); + assert!(!hidden.show_in_picker, "hidden models should remain hidden"); Ok(()) } @@ -412,10 +506,10 @@ async fn wait_for_model_available( } struct RemoteModelsHarness { - codex: Arc, + codex: Arc, cwd: Arc, config: Config, - conversation_manager: Arc, + thread_manager: Arc, } // todo(aibrahim): move this to with_model_provier in test_codex @@ -442,26 +536,39 @@ where mutate_config(&mut config); - let conversation_manager = Arc::new(ConversationManager::with_models_provider(auth, provider)); + let thread_manager = ThreadManager::with_models_provider(auth, provider); + let thread_manager = Arc::new(thread_manager); - let new_conversation = conversation_manager - .new_conversation(config.clone()) - .await?; + let new_conversation = thread_manager.start_thread(config.clone()).await?; Ok(RemoteModelsHarness { - codex: new_conversation.conversation, + codex: new_conversation.thread, cwd, config, - conversation_manager, + thread_manager, }) } fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> ModelInfo { + test_remote_model_with_policy( + slug, + visibility, + priority, + TruncationPolicyConfig::bytes(10_000), + ) +} + +fn test_remote_model_with_policy( + slug: &str, + visibility: ModelVisibility, + priority: i32, + truncation_policy: TruncationPolicyConfig, +) -> ModelInfo { ModelInfo { slug: slug.to_string(), display_name: format!("{slug} display"), description: Some(format!("{slug} description")), - default_reasoning_level: ReasoningEffort::Medium, + default_reasoning_level: Some(ReasoningEffort::Medium), supported_reasoning_levels: vec![ReasoningEffortPreset { effort: ReasoningEffort::Medium, description: ReasoningEffort::Medium.to_string(), @@ -471,14 +578,16 @@ fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> supported_in_api: true, priority, upgrade: None, - base_instructions: None, + base_instructions: "base instructions".to_string(), supports_reasoning_summaries: false, support_verbosity: false, default_verbosity: None, apply_patch_tool_type: None, - truncation_policy: TruncationPolicyConfig::bytes(10_000), + truncation_policy, supports_parallel_tool_calls: false, - context_window: None, + context_window: Some(272_000), + auto_compact_token_limit: None, + effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), } } diff --git a/codex-rs/core/tests/suite/request_compression.rs b/codex-rs/core/tests/suite/request_compression.rs new file mode 100644 index 00000000000..271f67e1aff --- /dev/null +++ b/codex-rs/core/tests/suite/request_compression.rs @@ -0,0 +1,116 @@ +#![cfg(not(target_os = "windows"))] + +use codex_core::CodexAuth; +use codex_core::features::Feature; +use codex_core::protocol::EventMsg; +use codex_core::protocol::Op; +use codex_protocol::user_input::UserInput; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::get_responses_requests; +use core_test_support::responses::mount_sse_once; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use pretty_assertions::assert_eq; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn request_body_is_zstd_compressed_for_codex_backend_when_enabled() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + mount_sse_once( + &server, + sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), + ) + .await; + + let base_url = format!("{}/backend-api/codex/v1", server.uri()); + let mut builder = test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_config(move |config| { + config.features.enable(Feature::EnableRequestCompression); + config.model_provider.base_url = Some(base_url); + }); + let codex = builder.build(&server).await?.codex; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "compress me".into(), + }], + final_output_json_schema: None, + }) + .await?; + + // Wait until the task completes so the request definitely hit the server. + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let requests = get_responses_requests(&server).await; + assert_eq!(requests.len(), 1); + + let request = &requests[0]; + let content_encoding = request + .headers + .get("content-encoding") + .and_then(|v| v.to_str().ok()); + assert_eq!(content_encoding, Some("zstd")); + + let decompressed = zstd::stream::decode_all(std::io::Cursor::new(request.body.clone()))?; + let json: serde_json::Value = serde_json::from_slice(&decompressed)?; + assert!( + json.get("input").is_some(), + "expected request body to decode as Responses API JSON" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn request_body_is_not_compressed_for_api_key_auth_even_when_enabled() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + mount_sse_once( + &server, + sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), + ) + .await; + + let base_url = format!("{}/backend-api/codex/v1", server.uri()); + let mut builder = test_codex().with_config(move |config| { + config.features.enable(Feature::EnableRequestCompression); + config.model_provider.base_url = Some(base_url); + }); + let codex = builder.build(&server).await?.codex; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "do not compress".into(), + }], + final_output_json_schema: None, + }) + .await?; + + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let requests = get_responses_requests(&server).await; + assert_eq!(requests.len(), 1); + + let request = &requests[0]; + assert!( + request.headers.get("content-encoding").is_none(), + "did not expect request compression for API-key auth" + ); + + let json: serde_json::Value = serde_json::from_slice(&request.body)?; + assert!( + json.get("input").is_some(), + "expected request body to be plain Responses API JSON" + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/resume.rs b/codex-rs/core/tests/suite/resume.rs index bda27533cd7..1fee3858e2f 100644 --- a/codex-rs/core/tests/suite/resume.rs +++ b/codex-rs/core/tests/suite/resume.rs @@ -37,6 +37,7 @@ async fn resume_includes_initial_messages_from_rollout_events() -> Result<()> { items: vec![UserInput::Text { text: "Record some messages".into(), }], + final_output_json_schema: None, }) .await?; @@ -89,6 +90,7 @@ async fn resume_includes_initial_messages_from_reasoning_events() -> Result<()> items: vec![UserInput::Text { text: "Record reasoning messages".into(), }], + final_output_json_schema: None, }) .await?; diff --git a/codex-rs/core/tests/suite/resume_warning.rs b/codex-rs/core/tests/suite/resume_warning.rs index 92acf7de0f0..5b38ce4b8d7 100644 --- a/codex-rs/core/tests/suite/resume_warning.rs +++ b/codex-rs/core/tests/suite/resume_warning.rs @@ -2,15 +2,15 @@ use codex_core::AuthManager; use codex_core::CodexAuth; -use codex_core::ConversationManager; -use codex_core::NewConversation; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::protocol::EventMsg; use codex_core::protocol::InitialHistory; use codex_core::protocol::ResumedHistory; use codex_core::protocol::RolloutItem; use codex_core::protocol::TurnContextItem; use codex_core::protocol::WarningEvent; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use core::time::Duration; use core_test_support::load_default_config_for_test; use core_test_support::wait_for_event; @@ -36,7 +36,7 @@ fn resume_history( }; InitialHistory::Resumed(ResumedHistory { - conversation_id: ConversationId::default(), + conversation_id: ThreadId::default(), history: vec![RolloutItem::TurnContext(turn_ctx)], rollout_path: rollout_path.to_path_buf(), }) @@ -56,15 +56,18 @@ async fn emits_warning_when_resumed_model_differs() { let initial_history = resume_history(&config, "previous-model", &rollout_path); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("test"), config.model_provider.clone(), ); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")); // Act: resume the conversation. - let NewConversation { conversation, .. } = conversation_manager - .resume_conversation_with_history(config, initial_history, auth_manager) + let NewThread { + thread: conversation, + .. + } = thread_manager + .resume_thread_with_history(config, initial_history, auth_manager) .await .expect("resume conversation"); diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index b88abe7ac75..763b6109df4 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -1,10 +1,10 @@ use codex_core::CodexAuth; -use codex_core::CodexConversation; +use codex_core::CodexThread; use codex_core::ContentItem; -use codex_core::ConversationManager; use codex_core::ModelProviderInfo; use codex_core::REVIEW_PROMPT; use codex_core::ResponseItem; +use codex_core::ThreadManager; use codex_core::built_in_model_providers; use codex_core::config::Config; use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG; @@ -665,6 +665,7 @@ async fn review_history_surfaces_in_parent_session() { items: vec![UserInput::Text { text: followup.clone(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -831,7 +832,7 @@ async fn new_conversation_for_server( server: &MockServer, codex_home: &TempDir, mutator: F, -) -> Arc +) -> Arc where F: FnOnce(&mut Config), { @@ -842,15 +843,15 @@ where let mut config = load_default_config_for_test(codex_home).await; config.model_provider = model_provider; mutator(&mut config); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), ); - conversation_manager - .new_conversation(config) + thread_manager + .start_thread(config) .await .expect("create conversation") - .conversation + .thread } /// Create a conversation resuming from a rollout file, configured to talk to the provided mock server. @@ -860,7 +861,7 @@ async fn resume_conversation_for_server( codex_home: &TempDir, resume_path: std::path::PathBuf, mutator: F, -) -> Arc +) -> Arc where F: FnOnce(&mut Config), { @@ -871,15 +872,15 @@ where let mut config = load_default_config_for_test(codex_home).await; config.model_provider = model_provider; mutator(&mut config); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), ); let auth_manager = codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); - conversation_manager - .resume_conversation_from_rollout(config, resume_path, auth_manager) + thread_manager + .resume_thread_from_rollout(config, resume_path, auth_manager) .await .expect("resume conversation") - .conversation + .thread } diff --git a/codex-rs/core/tests/suite/rollout_list_find.rs b/codex-rs/core/tests/suite/rollout_list_find.rs index 1d40718d4b2..518f26c5625 100644 --- a/codex-rs/core/tests/suite/rollout_list_find.rs +++ b/codex-rs/core/tests/suite/rollout_list_find.rs @@ -3,7 +3,7 @@ use std::io::Write; use std::path::Path; use std::path::PathBuf; -use codex_core::find_conversation_path_by_id_str; +use codex_core::find_thread_path_by_id_str; use tempfile::TempDir; use uuid::Uuid; @@ -44,7 +44,7 @@ async fn find_locates_rollout_file_by_id() { let id = Uuid::new_v4(); let expected = write_minimal_rollout_with_id(home.path(), id); - let found = find_conversation_path_by_id_str(home.path(), &id.to_string()) + let found = find_thread_path_by_id_str(home.path(), &id.to_string()) .await .unwrap(); @@ -60,7 +60,7 @@ async fn find_handles_gitignore_covering_codex_home_directory() { let id = Uuid::new_v4(); let expected = write_minimal_rollout_with_id(&codex_home, id); - let found = find_conversation_path_by_id_str(&codex_home, &id.to_string()) + let found = find_thread_path_by_id_str(&codex_home, &id.to_string()) .await .unwrap(); @@ -74,7 +74,7 @@ async fn find_ignores_granular_gitignore_rules() { let expected = write_minimal_rollout_with_id(home.path(), id); std::fs::write(home.path().join("sessions/.gitignore"), "*.jsonl\n").unwrap(); - let found = find_conversation_path_by_id_str(home.path(), &id.to_string()) + let found = find_thread_path_by_id_str(home.path(), &id.to_string()) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/skills.rs b/codex-rs/core/tests/suite/skills.rs index e64b5db3e79..8e9266ee868 100644 --- a/codex-rs/core/tests/suite/skills.rs +++ b/codex-rs/core/tests/suite/skills.rs @@ -2,7 +2,6 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] use anyhow::Result; -use codex_core::features::Feature; use codex_core::protocol::AskForApproval; use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; @@ -41,13 +40,9 @@ async fn user_turn_includes_skill_instructions() -> Result<()> { let server = start_mock_server().await; let skill_body = "skill body"; - let mut builder = test_codex() - .with_config(|config| { - config.features.enable(Feature::Skills); - }) - .with_pre_build_hook(|home| { - write_skill(home, "demo", "demo skill", skill_body); - }); + let mut builder = test_codex().with_pre_build_hook(|home| { + write_skill(home, "demo", "demo skill", skill_body); + }); let test = builder.build(&server).await?; let skill_path = test.codex_home_path().join("skills/demo/SKILL.md"); @@ -111,15 +106,11 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex() - .with_config(|config| { - config.features.enable(Feature::Skills); - }) - .with_pre_build_hook(|home| { - let skill_dir = home.join("skills").join("broken"); - fs::create_dir_all(&skill_dir).unwrap(); - fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap(); - }); + let mut builder = test_codex().with_pre_build_hook(|home| { + let skill_dir = home.join("skills").join("broken"); + fs::create_dir_all(&skill_dir).unwrap(); + fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap(); + }); let test = builder.build(&server).await?; test.codex @@ -169,17 +160,13 @@ async fn list_skills_includes_system_cache_entries() -> Result<()> { const SYSTEM_SKILL_NAME: &str = "skill-creator"; let server = start_mock_server().await; - let mut builder = test_codex() - .with_config(|config| { - config.features.enable(Feature::Skills); - }) - .with_pre_build_hook(|home| { - let system_skill_path = system_skill_md_path(home, SYSTEM_SKILL_NAME); - assert!( - !system_skill_path.exists(), - "expected embedded system skills not yet installed, but {system_skill_path:?} exists" - ); - }); + let mut builder = test_codex().with_pre_build_hook(|home| { + let system_skill_path = system_skill_md_path(home, SYSTEM_SKILL_NAME); + assert!( + !system_skill_path.exists(), + "expected embedded system skills not yet installed, but {system_skill_path:?} exists" + ); + }); let test = builder.build(&server).await?; let system_skill_path = system_skill_md_path(test.codex_home_path(), SYSTEM_SKILL_NAME); diff --git a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs index e6f8aa95605..e7a60912643 100644 --- a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs +++ b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs @@ -89,6 +89,7 @@ async fn continue_after_stream_error() { items: vec![UserInput::Text { text: "first message".into(), }], + final_output_json_schema: None, }) .await .unwrap(); @@ -106,6 +107,7 @@ async fn continue_after_stream_error() { items: vec![UserInput::Text { text: "follow up".into(), }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/stream_no_completed.rs b/codex-rs/core/tests/suite/stream_no_completed.rs index 17a19e7c343..a203658c2fd 100644 --- a/codex-rs/core/tests/suite/stream_no_completed.rs +++ b/codex-rs/core/tests/suite/stream_no_completed.rs @@ -96,6 +96,7 @@ async fn retries_on_early_close() { items: vec![UserInput::Text { text: "hello".into(), }], + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/core/tests/suite/tool_parallelism.rs b/codex-rs/core/tests/suite/tool_parallelism.rs index 5a5b4432095..71a6031fe83 100644 --- a/codex-rs/core/tests/suite/tool_parallelism.rs +++ b/codex-rs/core/tests/suite/tool_parallelism.rs @@ -301,8 +301,11 @@ async fn shell_tools_start_before_response_completed_when_stream_delayed() -> an "perl -MTime::HiRes -e 'print int(Time::HiRes::time()*1000), \"\\n\"' >> \"{}\"", output_path.display() ); + // Use a non-login shell to avoid slow, user-specific shell init (e.g. zsh profiles) + // from making this timing-based test flaky. let args = json!({ "command": command, + "login": false, "timeout_ms": 1_000, }); diff --git a/codex-rs/core/tests/suite/undo.rs b/codex-rs/core/tests/suite/undo.rs index 9fca272821c..61bc3b4fdfe 100644 --- a/codex-rs/core/tests/suite/undo.rs +++ b/codex-rs/core/tests/suite/undo.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use anyhow::Context; use anyhow::Result; use anyhow::bail; -use codex_core::CodexConversation; +use codex_core::CodexThread; use codex_core::features::Feature; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; @@ -108,7 +108,7 @@ async fn run_apply_patch_turn( harness.submit(prompt).await } -async fn invoke_undo(codex: &Arc) -> Result { +async fn invoke_undo(codex: &Arc) -> Result { codex.submit(Op::Undo).await?; let event = wait_for_event_match(codex, |msg| match msg { EventMsg::UndoCompleted(done) => Some(done.clone()), @@ -118,7 +118,7 @@ async fn invoke_undo(codex: &Arc) -> Result) -> Result { +async fn expect_successful_undo(codex: &Arc) -> Result { let event = invoke_undo(codex).await?; assert!( event.success, @@ -128,7 +128,7 @@ async fn expect_successful_undo(codex: &Arc) -> Result) -> Result { +async fn expect_failed_undo(codex: &Arc) -> Result { let event = invoke_undo(codex).await?; assert!( !event.success, diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 2ca62a602f0..66319e21c20 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1689,7 +1689,7 @@ async fn unified_exec_closes_long_running_session_at_turn_end() -> Result<()> { codex .submit(Op::UserTurn { items: vec![UserInput::Text { - text: "close unified exec sessions on turn end".into(), + text: "close unified exec processes on turn end".into(), }], final_output_json_schema: None, cwd: cwd.path().to_path_buf(), @@ -1710,7 +1710,7 @@ async fn unified_exec_closes_long_running_session_at_turn_end() -> Result<()> { let begin_process_id = begin_event .process_id .clone() - .expect("expected process_id for long-running unified exec session"); + .expect("expected process_id for long-running unified exec process"); let pid = wait_for_pid_file(&pid_path).await?; assert!( @@ -2560,7 +2560,7 @@ async fn unified_exec_prunes_exited_sessions_first() -> Result<()> { let prune_start = requests .iter() .find_map(|req| req.function_call_output_text(prune_call_id)) - .expect("missing initial prune session output"); + .expect("missing initial prune process output"); let prune_start_output = parse_unified_exec_output(&prune_start)?; assert!(prune_start_output.process_id.is_some()); assert!(prune_start_output.exit_code.is_none()); @@ -2573,7 +2573,7 @@ async fn unified_exec_prunes_exited_sessions_first() -> Result<()> { assert!(keep_write_output.process_id.is_some()); assert!( keep_write_output.output.contains("still alive"), - "expected cat session to echo input, got {:?}", + "expected cat process to echo input, got {:?}", keep_write_output.output ); @@ -2582,7 +2582,7 @@ async fn unified_exec_prunes_exited_sessions_first() -> Result<()> { .find_map(|req| req.function_call_output_text(probe_call_id)) .expect("missing probe output"); assert!( - pruned_probe.contains("UnknownSessionId") || pruned_probe.contains("Unknown process id"), + pruned_probe.contains("UnknownProcessId") || pruned_probe.contains("Unknown process id"), "expected probe to fail after pruning, got {pruned_probe:?}" ); diff --git a/codex-rs/core/tests/suite/user_notification.rs b/codex-rs/core/tests/suite/user_notification.rs index 0d1e941d28a..a3289b87f71 100644 --- a/codex-rs/core/tests/suite/user_notification.rs +++ b/codex-rs/core/tests/suite/user_notification.rs @@ -61,6 +61,7 @@ echo -n "${@: -1}" > $(dirname "${0}")/notify.txt"#, items: vec![UserInput::Text { text: "hello world".into(), }], + final_output_json_schema: None, }) .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs index 270cb804870..d6818c0c54d 100644 --- a/codex-rs/core/tests/suite/user_shell_cmd.rs +++ b/codex-rs/core/tests/suite/user_shell_cmd.rs @@ -1,6 +1,6 @@ use anyhow::Context; -use codex_core::ConversationManager; -use codex_core::NewConversation; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::protocol::EventMsg; use codex_core::protocol::ExecCommandEndEvent; use codex_core::protocol::ExecCommandSource; @@ -42,15 +42,12 @@ async fn user_shell_cmd_ls_and_cat_in_temp_dir() { let mut config = load_default_config_for_test(&codex_home).await; config.cwd = cwd.path().to_path_buf(); - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( codex_core::CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { - conversation: codex, - .. - } = conversation_manager - .new_conversation(config) + let NewThread { thread: codex, .. } = thread_manager + .start_thread(config) .await .expect("create new conversation"); @@ -101,15 +98,12 @@ async fn user_shell_cmd_can_be_interrupted() { // Set up isolated config and conversation. let codex_home = TempDir::new().unwrap(); let config = load_default_config_for_test(&codex_home).await; - let conversation_manager = ConversationManager::with_models_provider( + let thread_manager = ThreadManager::with_models_provider( codex_core::CodexAuth::from_api_key("dummy"), config.model_provider.clone(), ); - let NewConversation { - conversation: codex, - .. - } = conversation_manager - .new_conversation(config) + let NewThread { thread: codex, .. } = thread_manager + .start_thread(config) .await .expect("create new conversation"); diff --git a/codex-rs/core/tests/suite/web_search_cached.rs b/codex-rs/core/tests/suite/web_search_cached.rs new file mode 100644 index 00000000000..fa8e303d809 --- /dev/null +++ b/codex-rs/core/tests/suite/web_search_cached.rs @@ -0,0 +1,87 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::features::Feature; +use core_test_support::load_sse_fixture_with_id; +use core_test_support::responses; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use pretty_assertions::assert_eq; +use serde_json::Value; + +fn sse_completed(id: &str) -> String { + load_sse_fixture_with_id("tests/fixtures/completed_template.json", id) +} + +#[allow(clippy::expect_used)] +fn find_web_search_tool(body: &Value) -> &Value { + body["tools"] + .as_array() + .expect("request body should include tools array") + .iter() + .find(|tool| tool.get("type").and_then(Value::as_str) == Some("web_search")) + .expect("tools should include a web_search tool") +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn web_search_cached_sets_external_web_access_false_in_request_body() { + skip_if_no_network!(); + + let server = start_mock_server().await; + let sse = sse_completed("resp-1"); + let resp_mock = responses::mount_sse_once(&server, sse).await; + + let mut builder = test_codex() + .with_model("gpt-5-codex") + .with_config(|config| { + config.features.enable(Feature::WebSearchCached); + }); + let test = builder + .build(&server) + .await + .expect("create test Codex conversation"); + + test.submit_turn("hello cached web search") + .await + .expect("submit turn"); + + let body = resp_mock.single_request().body_json(); + let tool = find_web_search_tool(&body); + assert_eq!( + tool.get("external_web_access").and_then(Value::as_bool), + Some(false), + "web_search_cached should force external_web_access=false" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn web_search_cached_takes_precedence_over_web_search_request_in_request_body() { + skip_if_no_network!(); + + let server = start_mock_server().await; + let sse = sse_completed("resp-1"); + let resp_mock = responses::mount_sse_once(&server, sse).await; + + let mut builder = test_codex() + .with_model("gpt-5-codex") + .with_config(|config| { + config.features.enable(Feature::WebSearchRequest); + config.features.enable(Feature::WebSearchCached); + }); + let test = builder + .build(&server) + .await + .expect("create test Codex conversation"); + + test.submit_turn("hello cached+live flags") + .await + .expect("submit turn"); + + let body = resp_mock.single_request().body_json(); + let tool = find_web_search_tool(&body); + assert_eq!( + tool.get("external_web_access").and_then(Value::as_bool), + Some(false), + "web_search_cached should win over web_search_request" + ); +} diff --git a/codex-rs/deny.toml b/codex-rs/deny.toml index f9d411f841b..0a4a08bd89d 100644 --- a/codex-rs/deny.toml +++ b/codex-rs/deny.toml @@ -73,6 +73,8 @@ ignore = [ { id = "RUSTSEC-2024-0388", reason = "derivative is unmaintained; pulled in via starlark v0.13.0 used by execpolicy/cli/core; no fixed release yet" }, { id = "RUSTSEC-2025-0057", reason = "fxhash is unmaintained; pulled in via starlark_map/starlark v0.13.0 used by execpolicy/cli/core; no fixed release yet" }, { id = "RUSTSEC-2024-0436", reason = "paste is unmaintained; pulled in via ratatui/rmcp/starlark used by tui/execpolicy; no fixed release yet" }, + # TODO(joshka, nornagon): remove this exception when once we update the ratatui fork to a version that uses lru 0.13+. + { id = "RUSTSEC-2026-0002", reason = "lru 0.12.5 is pulled in via ratatui fork; cannot upgrade until the fork is updated" }, ] # If this is true, then cargo deny will use the git executable to fetch advisory database. # If this is false, then it uses a built-in git library. diff --git a/codex-rs/docs/codex_mcp_interface.md b/codex-rs/docs/codex_mcp_interface.md index 124e2f91dc9..edd5ac1b2d7 100644 --- a/codex-rs/docs/codex_mcp_interface.md +++ b/codex-rs/docs/codex_mcp_interface.md @@ -70,7 +70,9 @@ Response: `{ conversationId, model, reasoningEffort?, rolloutPath }` Send input to the active turn: - `sendUserMessage` → enqueue items to the conversation -- `sendUserTurn` → structured turn with explicit `cwd`, `approvalPolicy`, `sandboxPolicy`, `model`, optional `effort`, and `summary` +- `sendUserTurn` → structured turn with explicit `cwd`, `approvalPolicy`, `sandboxPolicy`, `model`, optional `effort`, `summary`, and optional `outputSchema` (JSON Schema for the final assistant message) + +For v2 threads, `turn/start` also accepts `outputSchema` to constrain the final assistant message for that turn. Interrupt a running turn: `interruptConversation`. diff --git a/codex-rs/exec-server/tests/common/lib.rs b/codex-rs/exec-server/tests/common/lib.rs index a5a6a3cac58..562d3504f6e 100644 --- a/codex-rs/exec-server/tests/common/lib.rs +++ b/codex-rs/exec-server/tests/common/lib.rs @@ -1,6 +1,7 @@ use codex_core::MCP_SANDBOX_STATE_METHOD; use codex_core::SandboxState; use codex_core::protocol::SandboxPolicy; +use codex_utils_cargo_bin::find_resource; use rmcp::ClientHandler; use rmcp::ErrorData as McpError; use rmcp::RoleClient; @@ -34,16 +35,10 @@ where { let mcp_executable = codex_utils_cargo_bin::cargo_bin("codex-exec-mcp-server")?; let execve_wrapper = codex_utils_cargo_bin::cargo_bin("codex-execve-wrapper")?; - // `bash` requires a special lookup when running under Buck because it is a - // _resource_ rather than a binary target. - let bash = if let Some(root) = codex_utils_cargo_bin::buck_project_root()? { - root.join("codex-rs/exec-server/tests/suite/bash") - } else { - Path::new(env!("CARGO_MANIFEST_DIR")) - .join("..") - .join("suite") - .join("bash") - }; + + // `bash` is a test resource rather than a binary target, so we must use + // `find_resource!` to locate it instead of `cargo_bin()`. + let bash = find_resource!("../suite/bash")?; // Need to ensure the artifact associated with the bash DotSlash file is // available before it is run in a read-only sandbox. diff --git a/codex-rs/exec/src/cli.rs b/codex-rs/exec/src/cli.rs index 392ebb0cd6d..8cff14f929a 100644 --- a/codex-rs/exec/src/cli.rs +++ b/codex-rs/exec/src/cli.rs @@ -11,11 +11,17 @@ pub struct Cli { pub command: Option, /// Optional image(s) to attach to the initial prompt. - #[arg(long = "image", short = 'i', value_name = "FILE", value_delimiter = ',', num_args = 1..)] + #[arg( + long = "image", + short = 'i', + value_name = "FILE", + value_delimiter = ',', + num_args = 1.. + )] pub images: Vec, /// Model the agent should use. - #[arg(long, short = 'm')] + #[arg(long, short = 'm', global = true)] pub model: Option, /// Use open-source provider. @@ -37,7 +43,7 @@ pub struct Cli { pub config_profile: Option, /// Convenience alias for low-friction sandboxed automatic execution (-a on-request, --sandbox workspace-write). - #[arg(long = "full-auto", default_value_t = false)] + #[arg(long = "full-auto", default_value_t = false, global = true)] pub full_auto: bool, /// Skip all confirmation prompts and execute commands without sandboxing. @@ -46,6 +52,7 @@ pub struct Cli { long = "dangerously-bypass-approvals-and-sandbox", alias = "yolo", default_value_t = false, + global = true, conflicts_with = "full_auto" )] pub dangerously_bypass_approvals_and_sandbox: bool, @@ -55,7 +62,7 @@ pub struct Cli { pub cwd: Option, /// Allow running Codex outside a Git repository. - #[arg(long = "skip-git-repo-check", default_value_t = false)] + #[arg(long = "skip-git-repo-check", global = true, default_value_t = false)] pub skip_git_repo_check: bool, /// Additional directories that should be writable alongside the primary workspace. @@ -74,7 +81,12 @@ pub struct Cli { pub color: Color, /// Print events to stdout as JSONL. - #[arg(long = "json", alias = "experimental-json", default_value_t = false)] + #[arg( + long = "json", + alias = "experimental-json", + default_value_t = false, + global = true + )] pub json: bool, /// Specifies file where the last message from the agent should be written. @@ -107,6 +119,16 @@ pub struct ResumeArgs { #[arg(long = "last", default_value_t = false)] pub last: bool, + /// Optional image(s) to attach to the prompt sent after resuming. + #[arg( + long = "image", + short = 'i', + value_name = "FILE", + value_delimiter = ',', + num_args = 1 + )] + pub images: Vec, + /// Prompt to send after resuming the session. If `-` is used, read from stdin. #[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)] pub prompt: Option, diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 40afab7c9c6..ba6d99c5a82 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -595,7 +595,8 @@ impl EventProcessor for EventProcessorWithHumanOutput { | EventMsg::ReasoningRawContentDelta(_) | EventMsg::SkillsUpdateAvailable | EventMsg::UndoCompleted(_) - | EventMsg::UndoStarted(_) => {} + | EventMsg::UndoStarted(_) + | EventMsg::ThreadRolledBack(_) => {} } CodexStatus::Running } diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 93a481b630e..89be1ac6c69 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -16,10 +16,10 @@ pub use cli::ReviewArgs; use codex_common::oss::ensure_oss_provider_ready; use codex_common::oss::get_default_model_for_oss_provider; use codex_core::AuthManager; -use codex_core::ConversationManager; use codex_core::LMSTUDIO_OSS_PROVIDER_ID; -use codex_core::NewConversation; +use codex_core::NewThread; use codex_core::OLLAMA_OSS_PROVIDER_ID; +use codex_core::ThreadManager; use codex_core::auth::enforce_login_restrictions; use codex_core::config::Config; use codex_core::config::ConfigOverrides; @@ -55,7 +55,7 @@ use crate::cli::Command as ExecCommand; use crate::event_processor::CodexStatus; use crate::event_processor::EventProcessor; use codex_core::default_client::set_default_originator; -use codex_core::find_conversation_path_by_id_str; +use codex_core::find_thread_path_by_id_str; enum InitialOperation { UserTurn { @@ -286,33 +286,33 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any true, config.cli_auth_credentials_store_mode, ); - let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec); - let default_model = conversation_manager + let thread_manager = ThreadManager::new( + config.codex_home.clone(), + auth_manager.clone(), + SessionSource::Exec, + ); + let default_model = thread_manager .get_models_manager() .get_model(&config.model, &config) .await; // Handle resume subcommand by resolving a rollout path and using explicit resume API. - let NewConversation { - conversation_id: _, - conversation, + let NewThread { + thread_id: _, + thread, session_configured, } = if let Some(ExecCommand::Resume(args)) = command.as_ref() { let resume_path = resolve_resume_path(&config, args).await?; if let Some(path) = resume_path { - conversation_manager - .resume_conversation_from_rollout(config.clone(), path, auth_manager.clone()) + thread_manager + .resume_thread_from_rollout(config.clone(), path, auth_manager.clone()) .await? } else { - conversation_manager - .new_conversation(config.clone()) - .await? + thread_manager.start_thread(config.clone()).await? } } else { - conversation_manager - .new_conversation(config.clone()) - .await? + thread_manager.start_thread(config.clone()).await? }; let (initial_operation, prompt_summary) = match (command, prompt, images) { (Some(ExecCommand::Review(review_cli)), _, _) => { @@ -335,6 +335,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any let prompt_text = resolve_prompt(prompt_arg); let mut items: Vec = imgs .into_iter() + .chain(args.images.into_iter()) .map(|path| UserInput::LocalImage { path }) .collect(); items.push(UserInput::Text { @@ -377,20 +378,20 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::(); { - let conversation = conversation.clone(); + let thread = thread.clone(); tokio::spawn(async move { loop { tokio::select! { _ = tokio::signal::ctrl_c() => { tracing::debug!("Keyboard interrupt"); // Immediately notify Codex to abort any in‑flight task. - conversation.submit(Op::Interrupt).await.ok(); + thread.submit(Op::Interrupt).await.ok(); // Exit the inner loop and return to the main input prompt. The codex // will emit a `TurnInterrupted` (Error) event which is drained later. break; } - res = conversation.next_event() => match res { + res = thread.next_event() => match res { Ok(event) => { debug!("Received event: {event:?}"); @@ -419,7 +420,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any items, output_schema, } => { - let task_id = conversation + let task_id = thread .submit(Op::UserTurn { items, cwd: default_cwd, @@ -435,7 +436,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any task_id } InitialOperation::Review { review_request } => { - let task_id = conversation.submit(Op::Review { review_request }).await?; + let task_id = thread.submit(Op::Review { review_request }).await?; info!("Sent review request with event ID: {task_id}"); task_id } @@ -448,7 +449,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any while let Some(event) = rx.recv().await { if let EventMsg::ElicitationRequest(ev) = &event.msg { // Automatically cancel elicitation requests in exec mode. - conversation + thread .submit(Op::ResolveElicitation { server_name: ev.server_name.clone(), request_id: ev.id.clone(), @@ -463,7 +464,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any match shutdown { CodexStatus::Running => continue, CodexStatus::InitiateShutdown => { - conversation.submit(Op::Shutdown).await?; + thread.submit(Op::Shutdown).await?; } CodexStatus::Shutdown => { break; @@ -484,7 +485,7 @@ async fn resolve_resume_path( ) -> anyhow::Result> { if args.last { let default_provider_filter = vec![config.model_provider_id.clone()]; - match codex_core::RolloutRecorder::list_conversations( + match codex_core::RolloutRecorder::list_threads( &config.codex_home, 1, None, @@ -496,12 +497,12 @@ async fn resolve_resume_path( { Ok(page) => Ok(page.items.first().map(|it| it.path.clone())), Err(e) => { - error!("Error listing conversations: {e}"); + error!("Error listing threads: {e}"); Ok(None) } } } else if let Some(id_str) = args.session_id.as_deref() { - let path = find_conversation_path_by_id_str(&config.codex_home, id_str).await?; + let path = find_thread_path_by_id_str(&config.codex_home, id_str).await?; Ok(path) } else { Ok(None) diff --git a/codex-rs/exec/tests/event_processor_with_json_output.rs b/codex-rs/exec/tests/event_processor_with_json_output.rs index d288f568e8b..a3e23181655 100644 --- a/codex-rs/exec/tests/event_processor_with_json_output.rs +++ b/codex-rs/exec/tests/event_processor_with_json_output.rs @@ -69,8 +69,7 @@ fn event(id: &str, msg: EventMsg) -> Event { fn session_configured_produces_thread_started_event() { let mut ep = EventProcessorWithJsonOutput::new(None); let session_id = - codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8") - .unwrap(); + codex_protocol::ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap(); let rollout_path = PathBuf::from("/tmp/rollout.json"); let ev = event( "e1", diff --git a/codex-rs/exec/tests/suite/auth_env.rs b/codex-rs/exec/tests/suite/auth_env.rs index 91d7bad8f8f..4f8018e808f 100644 --- a/codex-rs/exec/tests/suite/auth_env.rs +++ b/codex-rs/exec/tests/suite/auth_env.rs @@ -1,4 +1,5 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] +use codex_utils_cargo_bin::find_resource; use core_test_support::responses::ev_completed; use core_test_support::responses::mount_sse_once_match; use core_test_support::responses::sse; @@ -10,6 +11,7 @@ use wiremock::matchers::header; async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> { let test = test_codex_exec(); let server = start_mock_server().await; + let repo_root = find_resource!(".")?; mount_sse_once_match( &server, @@ -21,7 +23,7 @@ async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> { test.cmd_with_server(&server) .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg("echo testing codex api key") .assert() .success(); diff --git a/codex-rs/exec/tests/suite/resume.rs b/codex-rs/exec/tests/suite/resume.rs index e37b38606a2..32913983e61 100644 --- a/codex-rs/exec/tests/suite/resume.rs +++ b/codex-rs/exec/tests/suite/resume.rs @@ -1,8 +1,9 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] use anyhow::Context; +use codex_utils_cargo_bin::find_resource; use core_test_support::test_codex_exec::test_codex_exec; +use pretty_assertions::assert_eq; use serde_json::Value; -use std::path::Path; use std::string::ToString; use uuid::Uuid; use walkdir::WalkDir; @@ -69,11 +70,52 @@ fn extract_conversation_id(path: &std::path::Path) -> String { .to_string() } +fn last_user_image_count(path: &std::path::Path) -> usize { + let content = std::fs::read_to_string(path).unwrap_or_default(); + let mut last_count = 0; + for line in content.lines() { + if line.trim().is_empty() { + continue; + } + let Ok(item): Result = serde_json::from_str(line) else { + continue; + }; + if item.get("type").and_then(|t| t.as_str()) != Some("response_item") { + continue; + } + let Some(payload) = item.get("payload") else { + continue; + }; + if payload.get("type").and_then(|t| t.as_str()) != Some("message") { + continue; + } + if payload.get("role").and_then(|r| r.as_str()) != Some("user") { + continue; + } + let Some(content_items) = payload.get("content").and_then(|v| v.as_array()) else { + continue; + }; + last_count = content_items + .iter() + .filter(|entry| entry.get("type").and_then(|t| t.as_str()) == Some("input_image")) + .count(); + } + last_count +} + +fn exec_fixture() -> anyhow::Result { + Ok(find_resource!("tests/fixtures/cli_responses_fixture.sse")?) +} + +fn exec_repo_root() -> anyhow::Result { + Ok(find_resource!(".")?) +} + #[test] fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> { let test = test_codex_exec(); - let fixture = - Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse"); + let fixture = exec_fixture()?; + let repo_root = exec_repo_root()?; // 1) First run: create a session with a unique marker in the content. let marker = format!("resume-last-{}", Uuid::new_v4()); @@ -84,7 +126,7 @@ fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> { .env("OPENAI_BASE_URL", "http://unused.local") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt) .assert() .success(); @@ -103,7 +145,7 @@ fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> { .env("OPENAI_BASE_URL", "http://unused.local") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt2) .arg("resume") .arg("--last") @@ -126,8 +168,8 @@ fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> { #[test] fn exec_resume_last_accepts_prompt_after_flag_in_json_mode() -> anyhow::Result<()> { let test = test_codex_exec(); - let fixture = - Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse"); + let fixture = exec_fixture()?; + let repo_root = exec_repo_root()?; // 1) First run: create a session with a unique marker in the content. let marker = format!("resume-last-json-{}", Uuid::new_v4()); @@ -138,7 +180,7 @@ fn exec_resume_last_accepts_prompt_after_flag_in_json_mode() -> anyhow::Result<( .env("OPENAI_BASE_URL", "http://unused.local") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt) .assert() .success(); @@ -157,7 +199,7 @@ fn exec_resume_last_accepts_prompt_after_flag_in_json_mode() -> anyhow::Result<( .env("OPENAI_BASE_URL", "http://unused.local") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg("--json") .arg("resume") .arg("--last") @@ -177,11 +219,45 @@ fn exec_resume_last_accepts_prompt_after_flag_in_json_mode() -> anyhow::Result<( Ok(()) } +#[test] +fn exec_resume_accepts_global_flags_after_subcommand() -> anyhow::Result<()> { + let test = test_codex_exec(); + let fixture = exec_fixture()?; + + // Seed a session. + test.cmd() + .env("CODEX_RS_SSE_FIXTURE", &fixture) + .env("OPENAI_BASE_URL", "http://unused.local") + .arg("--skip-git-repo-check") + .arg("echo seed-resume-session") + .assert() + .success(); + + // Resume while passing global flags after the subcommand to ensure clap accepts them. + test.cmd() + .env("CODEX_RS_SSE_FIXTURE", &fixture) + .env("OPENAI_BASE_URL", "http://unused.local") + .arg("resume") + .arg("--last") + .arg("--json") + .arg("--model") + .arg("gpt-5.2-codex") + .arg("--config") + .arg("reasoning_level=xhigh") + .arg("--dangerously-bypass-approvals-and-sandbox") + .arg("--skip-git-repo-check") + .arg("echo resume-with-global-flags-after-subcommand") + .assert() + .success(); + + Ok(()) +} + #[test] fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> { let test = test_codex_exec(); - let fixture = - Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse"); + let fixture = exec_fixture()?; + let repo_root = exec_repo_root()?; // 1) First run: create a session let marker = format!("resume-by-id-{}", Uuid::new_v4()); @@ -192,7 +268,7 @@ fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> { .env("OPENAI_BASE_URL", "http://unused.local") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt) .assert() .success(); @@ -215,7 +291,7 @@ fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> { .env("OPENAI_BASE_URL", "http://unused.local") .arg("--skip-git-repo-check") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt2) .arg("resume") .arg(&session_id) @@ -237,8 +313,8 @@ fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> { #[test] fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> { let test = test_codex_exec(); - let fixture = - Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse"); + let fixture = exec_fixture()?; + let repo_root = exec_repo_root()?; let marker = format!("resume-config-{}", Uuid::new_v4()); let prompt = format!("echo {marker}"); @@ -252,7 +328,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> { .arg("--model") .arg("gpt-5.1") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt) .assert() .success(); @@ -274,7 +350,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> { .arg("--model") .arg("gpt-5.1-high") .arg("-C") - .arg(env!("CARGO_MANIFEST_DIR")) + .arg(&repo_root) .arg(&prompt2) .arg("resume") .arg("--last") @@ -309,3 +385,64 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> { assert!(content.contains(&marker2)); Ok(()) } + +#[test] +fn exec_resume_accepts_images_after_subcommand() -> anyhow::Result<()> { + let test = test_codex_exec(); + let fixture = exec_fixture()?; + let repo_root = exec_repo_root()?; + + let marker = format!("resume-image-{}", Uuid::new_v4()); + let prompt = format!("echo {marker}"); + + test.cmd() + .env("CODEX_RS_SSE_FIXTURE", &fixture) + .env("OPENAI_BASE_URL", "http://unused.local") + .arg("--skip-git-repo-check") + .arg("-C") + .arg(&repo_root) + .arg(&prompt) + .assert() + .success(); + + let image_path = test.cwd_path().join("resume_image.png"); + let image_path_2 = test.cwd_path().join("resume_image_2.png"); + let image_bytes: &[u8] = &[ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, + 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x06, 0x00, 0x00, 0x00, 0x1F, + 0x15, 0xC4, 0x89, 0x00, 0x00, 0x00, 0x0A, 0x49, 0x44, 0x41, 0x54, 0x78, 0x9C, 0x63, 0x00, + 0x01, 0x00, 0x00, 0x05, 0x00, 0x01, 0x0D, 0x0A, 0x2D, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x49, + 0x45, 0x4E, 0x44, 0xAE, 0x42, 0x60, 0x82, + ]; + std::fs::write(&image_path, image_bytes)?; + std::fs::write(&image_path_2, image_bytes)?; + + let marker2 = format!("resume-image-2-{}", Uuid::new_v4()); + let prompt2 = format!("echo {marker2}"); + test.cmd() + .env("CODEX_RS_SSE_FIXTURE", &fixture) + .env("OPENAI_BASE_URL", "http://unused.local") + .arg("--skip-git-repo-check") + .arg("-C") + .arg(&repo_root) + .arg("resume") + .arg("--last") + .arg("--image") + .arg(&image_path) + .arg("--image") + .arg(&image_path_2) + .arg(&prompt2) + .assert() + .success(); + + let sessions_dir = test.home_path().join("sessions"); + let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2) + .expect("no session file found after resume with images"); + let image_count = last_user_image_count(&resumed_path); + assert_eq!( + image_count, 2, + "resume prompt should include both attached images" + ); + + Ok(()) +} diff --git a/codex-rs/execpolicy/README.md b/codex-rs/execpolicy/README.md index 288a46dcbc6..30fc57184bb 100644 --- a/codex-rs/execpolicy/README.md +++ b/codex-rs/execpolicy/README.md @@ -1,52 +1,65 @@ # codex-execpolicy ## Overview -- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, match?, not_match?)`. + +- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, justification?, match?, not_match?)`. - This release covers the prefix-rule subset of the execpolicy language; a richer language will follow. - Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`. +- `justification` is an optional human-readable rationale for why a rule exists. It can be provided for any `decision` and may be surfaced in different contexts (for example, in approval prompts or rejection messages). When `decision = "forbidden"` is used, include a recommended alternative in the `justification`, when appropriate (e.g., ``"Use `jj` instead of `git`."``). - `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`). - The CLI always prints the JSON serialization of the evaluation result. - The legacy rule matcher lives in `codex-execpolicy-legacy`. ## Policy shapes + - Prefix rules use Starlark syntax: + ```starlark prefix_rule( pattern = ["cmd", ["alt1", "alt2"]], # ordered tokens; list entries denote alternatives decision = "prompt", # allow | prompt | forbidden; defaults to allow + justification = "explain why this rule exists", match = [["cmd", "alt1"], "cmd alt2"], # examples that must match this rule not_match = [["cmd", "oops"], "cmd alt3"], # examples that must not match this rule ) ``` ## CLI + - From the Codex CLI, run `codex execpolicy check` subcommand with one or more policy files (for example `src/default.rules`) to check a command: + ```bash codex execpolicy check --rules path/to/policy.rules git status ``` + - Pass multiple `--rules` flags to merge rules, evaluated in the order provided, and use `--pretty` for formatted JSON. - You can also run the standalone dev binary directly during development: + ```bash cargo run -p codex-execpolicy -- check --rules path/to/policy.rules git status ``` + - Example outcomes: - Match: `{"matchedRules":[{...}],"decision":"allow"}` - No match: `{"matchedRules":[]}` ## Response shape + ```json { "matchedRules": [ { "prefixRuleMatch": { "matchedPrefix": ["", "..."], - "decision": "allow|prompt|forbidden" + "decision": "allow|prompt|forbidden", + "justification": "..." } } ], "decision": "allow|prompt|forbidden" } ``` + - When no rules match, `matchedRules` is an empty array and `decision` is omitted. - `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched. - The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`). diff --git a/codex-rs/execpolicy/examples/example.codexpolicy b/codex-rs/execpolicy/examples/example.codexpolicy index 5bb691b6f77..4040f612092 100644 --- a/codex-rs/execpolicy/examples/example.codexpolicy +++ b/codex-rs/execpolicy/examples/example.codexpolicy @@ -4,6 +4,7 @@ prefix_rule( pattern = ["git", "reset", "--hard"], decision = "forbidden", + justification = "destructive operation", match = [ ["git", "reset", "--hard"], ], diff --git a/codex-rs/execpolicy/src/error.rs b/codex-rs/execpolicy/src/error.rs index 2f168a027ec..9664e71a5cf 100644 --- a/codex-rs/execpolicy/src/error.rs +++ b/codex-rs/execpolicy/src/error.rs @@ -11,6 +11,8 @@ pub enum Error { InvalidPattern(String), #[error("invalid example: {0}")] InvalidExample(String), + #[error("invalid rule: {0}")] + InvalidRule(String), #[error( "expected every example to match at least one rule. rules: {rules:?}; unmatched examples: \ {examples:?}" diff --git a/codex-rs/execpolicy/src/parser.rs b/codex-rs/execpolicy/src/parser.rs index d5054905547..0ff0f4b34a7 100644 --- a/codex-rs/execpolicy/src/parser.rs +++ b/codex-rs/execpolicy/src/parser.rs @@ -212,6 +212,7 @@ fn policy_builtins(builder: &mut GlobalsBuilder) { decision: Option<&'v str>, r#match: Option>>, not_match: Option>>, + justification: Option<&'v str>, eval: &mut Evaluator<'v, '_, '_>, ) -> anyhow::Result { let decision = match decision { @@ -219,6 +220,14 @@ fn policy_builtins(builder: &mut GlobalsBuilder) { None => Decision::Allow, }; + let justification = match justification { + Some(raw) if raw.trim().is_empty() => { + return Err(Error::InvalidRule("justification cannot be empty".to_string()).into()); + } + Some(raw) => Some(raw.to_string()), + None => None, + }; + let pattern_tokens = parse_pattern(pattern)?; let matches: Vec> = @@ -246,6 +255,7 @@ fn policy_builtins(builder: &mut GlobalsBuilder) { rest: rest.clone(), }, decision, + justification: justification.clone(), }) as RuleRef }) .collect(); diff --git a/codex-rs/execpolicy/src/policy.rs b/codex-rs/execpolicy/src/policy.rs index 991e904ae96..0c06d572e4b 100644 --- a/codex-rs/execpolicy/src/policy.rs +++ b/codex-rs/execpolicy/src/policy.rs @@ -46,6 +46,7 @@ impl Policy { .into(), }, decision, + justification: None, }); self.rules_by_program.insert(first_token.clone(), rule); diff --git a/codex-rs/execpolicy/src/rule.rs b/codex-rs/execpolicy/src/rule.rs index cd0756bbb30..de78a5fda91 100644 --- a/codex-rs/execpolicy/src/rule.rs +++ b/codex-rs/execpolicy/src/rule.rs @@ -63,6 +63,12 @@ pub enum RuleMatch { #[serde(rename = "matchedPrefix")] matched_prefix: Vec, decision: Decision, + /// Optional rationale for why this rule exists. + /// + /// This can be supplied for any decision and may be surfaced in different contexts + /// (e.g., prompt reasons or rejection messages). + #[serde(skip_serializing_if = "Option::is_none")] + justification: Option, }, HeuristicsRuleMatch { command: Vec, @@ -83,6 +89,7 @@ impl RuleMatch { pub struct PrefixRule { pub pattern: PrefixPattern, pub decision: Decision, + pub justification: Option, } pub trait Rule: Any + Debug + Send + Sync { @@ -104,6 +111,7 @@ impl Rule for PrefixRule { .map(|matched_prefix| RuleMatch::PrefixRuleMatch { matched_prefix, decision: self.decision, + justification: self.justification.clone(), }) } } diff --git a/codex-rs/execpolicy/tests/basic.rs b/codex-rs/execpolicy/tests/basic.rs index 7ae5e6e2138..ed6cf3185ee 100644 --- a/codex-rs/execpolicy/tests/basic.rs +++ b/codex-rs/execpolicy/tests/basic.rs @@ -64,6 +64,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "status"]), decision: Decision::Allow, + justification: None, }], }, evaluation @@ -71,6 +72,84 @@ prefix_rule( Ok(()) } +#[test] +fn justification_is_attached_to_forbidden_matches() -> Result<()> { + let policy_src = r#" +prefix_rule( + pattern = ["rm"], + decision = "forbidden", + justification = "destructive command", +) + "#; + let mut parser = PolicyParser::new(); + parser.parse("test.rules", policy_src)?; + let policy = parser.build(); + + let evaluation = policy.check( + &tokens(&["rm", "-rf", "/some/important/folder"]), + &allow_all, + ); + assert_eq!( + Evaluation { + decision: Decision::Forbidden, + matched_rules: vec![RuleMatch::PrefixRuleMatch { + matched_prefix: tokens(&["rm"]), + decision: Decision::Forbidden, + justification: Some("destructive command".to_string()), + }], + }, + evaluation + ); + Ok(()) +} + +#[test] +fn justification_can_be_used_with_allow_decision() -> Result<()> { + let policy_src = r#" +prefix_rule( + pattern = ["ls"], + decision = "allow", + justification = "safe and commonly used", +) + "#; + let mut parser = PolicyParser::new(); + parser.parse("test.rules", policy_src)?; + let policy = parser.build(); + + let evaluation = policy.check(&tokens(&["ls", "-l"]), &prompt_all); + assert_eq!( + Evaluation { + decision: Decision::Allow, + matched_rules: vec![RuleMatch::PrefixRuleMatch { + matched_prefix: tokens(&["ls"]), + decision: Decision::Allow, + justification: Some("safe and commonly used".to_string()), + }], + }, + evaluation + ); + Ok(()) +} + +#[test] +fn justification_cannot_be_empty() { + let policy_src = r#" +prefix_rule( + pattern = ["ls"], + decision = "prompt", + justification = " ", +) + "#; + let mut parser = PolicyParser::new(); + let err = parser + .parse("test.rules", policy_src) + .expect_err("expected parse error"); + assert!( + err.to_string() + .contains("invalid rule: justification cannot be empty") + ); +} + #[test] fn add_prefix_rule_extends_policy() -> Result<()> { let mut policy = Policy::empty(); @@ -84,17 +163,19 @@ fn add_prefix_rule_extends_policy() -> Result<()> { rest: vec![PatternToken::Single(String::from("-l"))].into(), }, decision: Decision::Prompt, + justification: None, })], rules ); - let evaluation = policy.check(&tokens(&["ls", "-l", "/tmp"]), &allow_all); + let evaluation = policy.check(&tokens(&["ls", "-l", "/some/important/folder"]), &allow_all); assert_eq!( Evaluation { decision: Decision::Prompt, matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["ls", "-l"]), decision: Decision::Prompt, + justification: None, }], }, evaluation @@ -142,6 +223,7 @@ prefix_rule( rest: Vec::::new().into(), }, decision: Decision::Prompt, + justification: None, }), RuleSnapshot::Prefix(PrefixRule { pattern: PrefixPattern { @@ -149,6 +231,7 @@ prefix_rule( rest: vec![PatternToken::Single("commit".to_string())].into(), }, decision: Decision::Forbidden, + justification: None, }), ], git_rules @@ -161,6 +244,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + justification: None, }], }, status_eval @@ -174,10 +258,12 @@ prefix_rule( RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "commit"]), decision: Decision::Forbidden, + justification: None, }, ], }, @@ -211,6 +297,7 @@ prefix_rule( rest: vec![PatternToken::Alts(vec!["-c".to_string(), "-l".to_string()])].into(), }, decision: Decision::Allow, + justification: None, })], bash_rules ); @@ -221,6 +308,7 @@ prefix_rule( rest: vec![PatternToken::Alts(vec!["-c".to_string(), "-l".to_string()])].into(), }, decision: Decision::Allow, + justification: None, })], sh_rules ); @@ -232,6 +320,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["bash", "-c"]), decision: Decision::Allow, + justification: None, }], }, bash_eval @@ -244,6 +333,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["sh", "-l"]), decision: Decision::Allow, + justification: None, }], }, sh_eval @@ -277,6 +367,7 @@ prefix_rule( .into(), }, decision: Decision::Allow, + justification: None, })], rules ); @@ -288,6 +379,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["npm", "i", "--legacy-peer-deps"]), decision: Decision::Allow, + justification: None, }], }, npm_i @@ -303,6 +395,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["npm", "install", "--no-save"]), decision: Decision::Allow, + justification: None, }], }, npm_install @@ -332,6 +425,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "status"]), decision: Decision::Allow, + justification: None, }], }, match_eval @@ -378,10 +472,12 @@ prefix_rule( RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "commit"]), decision: Decision::Forbidden, + justification: None, }, ], }, @@ -419,14 +515,17 @@ prefix_rule( RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "commit"]), decision: Decision::Forbidden, + justification: None, }, ], }, diff --git a/codex-rs/feedback/src/lib.rs b/codex-rs/feedback/src/lib.rs index 2096f4505b6..4a227fe0930 100644 --- a/codex-rs/feedback/src/lib.rs +++ b/codex-rs/feedback/src/lib.rs @@ -11,7 +11,7 @@ use std::time::Duration; use anyhow::Result; use anyhow::anyhow; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::protocol::SessionSource; use tracing::Event; use tracing::Level; @@ -88,7 +88,7 @@ impl CodexFeedback { .with_filter(Targets::new().with_target(FEEDBACK_TAGS_TARGET, Level::TRACE)) } - pub fn snapshot(&self, session_id: Option) -> CodexLogSnapshot { + pub fn snapshot(&self, session_id: Option) -> CodexLogSnapshot { let bytes = { let guard = self.inner.ring.lock().expect("mutex poisoned"); guard.snapshot_bytes() @@ -102,7 +102,7 @@ impl CodexFeedback { tags, thread_id: session_id .map(|id| id.to_string()) - .unwrap_or("no-active-thread-".to_string() + &ConversationId::new().to_string()), + .unwrap_or("no-active-thread-".to_string() + &ThreadId::new().to_string()), } } } diff --git a/codex-rs/login/src/device_code_auth.rs b/codex-rs/login/src/device_code_auth.rs index d9e7d90ce28..5864ff65d9e 100644 --- a/codex-rs/login/src/device_code_auth.rs +++ b/codex-rs/login/src/device_code_auth.rs @@ -137,26 +137,27 @@ async fn poll_for_token( } } -fn print_device_code_prompt(code: &str) { +fn print_device_code_prompt(code: &str, issuer_base_url: &str) { println!( "\nWelcome to Codex [v{ANSI_GRAY}{version}{ANSI_RESET}]\n{ANSI_GRAY}OpenAI's command-line coding agent{ANSI_RESET}\n\ \nFollow these steps to sign in with ChatGPT using device code authorization:\n\ -\n1. Open this link in your browser and sign in to your account\n {ANSI_BLUE}https://auth.openai.com/codex/device{ANSI_RESET}\n\ +\n1. Open this link in your browser and sign in to your account\n {ANSI_BLUE}{issuer_base_url}/codex/device{ANSI_RESET}\n\ \n2. Enter this one-time code {ANSI_GRAY}(expires in 15 minutes){ANSI_RESET}\n {ANSI_BLUE}{code}{ANSI_RESET}\n\ \n{ANSI_GRAY}Device codes are a common phishing target. Never share this code.{ANSI_RESET}\n", version = env!("CARGO_PKG_VERSION"), - code = code + code = code, + issuer_base_url = issuer_base_url ); } /// Full device code login flow. pub async fn run_device_code_login(opts: ServerOptions) -> std::io::Result<()> { let client = reqwest::Client::new(); - let base_url = opts.issuer.trim_end_matches('/'); - let api_base_url = format!("{}/api/accounts", opts.issuer.trim_end_matches('/')); + let issuer_base_url = opts.issuer.trim_end_matches('/'); + let api_base_url = format!("{issuer_base_url}/api/accounts"); let uc = request_user_code(&client, &api_base_url, &opts.client_id).await?; - print_device_code_prompt(&uc.user_code); + print_device_code_prompt(&uc.user_code, issuer_base_url); let code_resp = poll_for_token( &client, @@ -171,10 +172,10 @@ pub async fn run_device_code_login(opts: ServerOptions) -> std::io::Result<()> { code_verifier: code_resp.code_verifier, code_challenge: code_resp.code_challenge, }; - let redirect_uri = format!("{base_url}/deviceauth/callback"); + let redirect_uri = format!("{issuer_base_url}/deviceauth/callback"); let tokens = crate::server::exchange_code_for_tokens( - base_url, + issuer_base_url, &opts.client_id, &redirect_uri, &pkce, diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index 16d38da45e7..26fdc073206 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -21,7 +21,7 @@ pub struct CodexToolCallParam { /// The *initial user prompt* to start the Codex conversation. pub prompt: String, - /// Optional override for the model name (e.g. "o3", "o4-mini"). + /// Optional override for the model name (e.g. 'gpt-5.2', 'gpt-5.2-codex'). #[serde(default, skip_serializing_if = "Option::is_none")] pub model: Option, @@ -245,11 +245,8 @@ mod tests { let tool = create_tool_for_codex_tool_call_param(); let tool_json = serde_json::to_value(&tool).expect("tool serializes"); let expected_tool_json = serde_json::json!({ - "name": "codex", - "title": "Codex", "description": "Run a Codex session. Accepts configuration parameters matching the Codex Config struct.", "inputSchema": { - "type": "object", "properties": { "approval-policy": { "description": "Approval policy for shell commands generated by the model: `untrusted`, `on-failure`, `on-request`, `never`.", @@ -261,26 +258,29 @@ mod tests { ], "type": "string" }, - "sandbox": { - "description": "Sandbox mode: `read-only`, `workspace-write`, or `danger-full-access`.", - "enum": [ - "read-only", - "workspace-write", - "danger-full-access" - ], + "base-instructions": { + "description": "The set of instructions to use instead of the default ones.", + "type": "string" + }, + "compact-prompt": { + "description": "Prompt used when compacting the conversation.", "type": "string" }, "config": { - "description": "Individual config settings that will override what is in CODEX_HOME/config.toml.", "additionalProperties": true, + "description": "Individual config settings that will override what is in CODEX_HOME/config.toml.", "type": "object" }, "cwd": { "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.", "type": "string" }, + "developer-instructions": { + "description": "Developer instructions that should be injected as a developer role message.", + "type": "string" + }, "model": { - "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").", + "description": "Optional override for the model name (e.g. 'gpt-5.2', 'gpt-5.2-codex').", "type": "string" }, "profile": { @@ -291,23 +291,23 @@ mod tests { "description": "The *initial user prompt* to start the Codex conversation.", "type": "string" }, - "base-instructions": { - "description": "The set of instructions to use instead of the default ones.", - "type": "string" - }, - "developer-instructions": { - "description": "Developer instructions that should be injected as a developer role message.", - "type": "string" - }, - "compact-prompt": { - "description": "Prompt used when compacting the conversation.", + "sandbox": { + "description": "Sandbox mode: `read-only`, `workspace-write`, or `danger-full-access`.", + "enum": [ + "read-only", + "workspace-write", + "danger-full-access" + ], "type": "string" - }, + } }, "required": [ "prompt" - ] - } + ], + "type": "object" + }, + "name": "codex", + "title": "Codex" }); assert_eq!(expected_tool_json, tool_json); } diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index 39ae7486e55..73b75dcbf83 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -9,9 +9,9 @@ use crate::exec_approval::handle_exec_approval_request; use crate::outgoing_message::OutgoingMessageSender; use crate::outgoing_message::OutgoingNotificationMeta; use crate::patch_approval::handle_patch_approval_request; -use codex_core::CodexConversation; -use codex_core::ConversationManager; -use codex_core::NewConversation; +use codex_core::CodexThread; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::config::Config as CodexConfig; use codex_core::protocol::AgentMessageEvent; use codex_core::protocol::ApplyPatchApprovalRequestEvent; @@ -21,7 +21,7 @@ use codex_core::protocol::ExecApprovalRequestEvent; use codex_core::protocol::Op; use codex_core::protocol::Submission; use codex_core::protocol::TaskCompleteEvent; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::user_input::UserInput; use mcp_types::CallToolResult; use mcp_types::ContentBlock; @@ -41,14 +41,14 @@ pub async fn run_codex_tool_session( initial_prompt: String, config: CodexConfig, outgoing: Arc, - conversation_manager: Arc, - running_requests_id_to_codex_uuid: Arc>>, + thread_manager: Arc, + running_requests_id_to_codex_uuid: Arc>>, ) { - let NewConversation { - conversation_id, - conversation, + let NewThread { + thread_id, + thread, session_configured, - } = match conversation_manager.new_conversation(config).await { + } = match thread_manager.start_thread(config).await { Ok(res) => res, Err(e) => { let result = CallToolResult { @@ -87,39 +87,34 @@ pub async fn run_codex_tool_session( running_requests_id_to_codex_uuid .lock() .await - .insert(id.clone(), conversation_id); + .insert(id.clone(), thread_id); let submission = Submission { id: sub_id.clone(), op: Op::UserInput { items: vec![UserInput::Text { text: initial_prompt.clone(), }], + final_output_json_schema: None, }, }; - if let Err(e) = conversation.submit_with_id(submission).await { + if let Err(e) = thread.submit_with_id(submission).await { tracing::error!("Failed to submit initial prompt: {e}"); // unregister the id so we don't keep it in the map running_requests_id_to_codex_uuid.lock().await.remove(&id); return; } - run_codex_tool_session_inner( - conversation, - outgoing, - id, - running_requests_id_to_codex_uuid, - ) - .await; + run_codex_tool_session_inner(thread, outgoing, id, running_requests_id_to_codex_uuid).await; } pub async fn run_codex_tool_session_reply( - conversation: Arc, + conversation: Arc, outgoing: Arc, request_id: RequestId, prompt: String, - running_requests_id_to_codex_uuid: Arc>>, - conversation_id: ConversationId, + running_requests_id_to_codex_uuid: Arc>>, + conversation_id: ThreadId, ) { running_requests_id_to_codex_uuid .lock() @@ -128,6 +123,7 @@ pub async fn run_codex_tool_session_reply( if let Err(e) = conversation .submit(Op::UserInput { items: vec![UserInput::Text { text: prompt }], + final_output_json_schema: None, }) .await { @@ -150,10 +146,10 @@ pub async fn run_codex_tool_session_reply( } async fn run_codex_tool_session_inner( - codex: Arc, + codex: Arc, outgoing: Arc, request_id: RequestId, - running_requests_id_to_codex_uuid: Arc>>, + running_requests_id_to_codex_uuid: Arc>>, ) { let request_id_str = match &request_id { RequestId::String(s) => s.clone(), @@ -309,6 +305,7 @@ async fn run_codex_tool_session_inner( | EventMsg::UndoCompleted(_) | EventMsg::ExitedReviewMode(_) | EventMsg::ContextCompacted(_) + | EventMsg::ThreadRolledBack(_) | EventMsg::DeprecationNotice(_) => { // For now, we do not do anything extra for these // events. Note that diff --git a/codex-rs/mcp-server/src/exec_approval.rs b/codex-rs/mcp-server/src/exec_approval.rs index 44607b754d7..47f52caf7fa 100644 --- a/codex-rs/mcp-server/src/exec_approval.rs +++ b/codex-rs/mcp-server/src/exec_approval.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use std::sync::Arc; -use codex_core::CodexConversation; +use codex_core::CodexThread; use codex_core::protocol::Op; use codex_core::protocol::ReviewDecision; use codex_protocol::parse_command::ParsedCommand; @@ -53,7 +53,7 @@ pub(crate) async fn handle_exec_approval_request( command: Vec, cwd: PathBuf, outgoing: Arc, - codex: Arc, + codex: Arc, request_id: RequestId, tool_call_id: String, event_id: String, @@ -120,7 +120,7 @@ pub(crate) async fn handle_exec_approval_request( async fn on_exec_approval_response( event_id: String, receiver: tokio::sync::oneshot::Receiver, - codex: Arc, + codex: Arc, ) { let response = receiver.await; let value = match response { diff --git a/codex-rs/mcp-server/src/message_processor.rs b/codex-rs/mcp-server/src/message_processor.rs index 81eb80764be..dcf5411a091 100644 --- a/codex-rs/mcp-server/src/message_processor.rs +++ b/codex-rs/mcp-server/src/message_processor.rs @@ -7,11 +7,11 @@ use crate::codex_tool_config::create_tool_for_codex_tool_call_param; use crate::codex_tool_config::create_tool_for_codex_tool_call_reply_param; use crate::error_code::INVALID_REQUEST_ERROR_CODE; use crate::outgoing_message::OutgoingMessageSender; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::protocol::SessionSource; use codex_core::AuthManager; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::config::Config; use codex_core::default_client::USER_AGENT_SUFFIX; use codex_core::default_client::get_codex_user_agent; @@ -40,8 +40,8 @@ pub(crate) struct MessageProcessor { outgoing: Arc, initialized: bool, codex_linux_sandbox_exe: Option, - conversation_manager: Arc, - running_requests_id_to_codex_uuid: Arc>>, + thread_manager: Arc, + running_requests_id_to_codex_uuid: Arc>>, } impl MessageProcessor { @@ -58,13 +58,16 @@ impl MessageProcessor { false, config.cli_auth_credentials_store_mode, ); - let conversation_manager = - Arc::new(ConversationManager::new(auth_manager, SessionSource::Mcp)); + let thread_manager = Arc::new(ThreadManager::new( + config.codex_home.clone(), + auth_manager, + SessionSource::Mcp, + )); Self { outgoing, initialized: false, codex_linux_sandbox_exe, - conversation_manager, + thread_manager, running_requests_id_to_codex_uuid: Arc::new(Mutex::new(HashMap::new())), } } @@ -403,7 +406,7 @@ impl MessageProcessor { // Clone outgoing and server to move into async task. let outgoing = self.outgoing.clone(); - let conversation_manager = self.conversation_manager.clone(); + let thread_manager = self.thread_manager.clone(); let running_requests_id_to_codex_uuid = self.running_requests_id_to_codex_uuid.clone(); // Spawn an async task to handle the Codex session so that we do not @@ -415,7 +418,7 @@ impl MessageProcessor { initial_prompt, config, outgoing, - conversation_manager, + thread_manager, running_requests_id_to_codex_uuid, ) .await; @@ -470,7 +473,7 @@ impl MessageProcessor { return; } }; - let conversation_id = match ConversationId::from_string(&conversation_id) { + let conversation_id = match ThreadId::from_string(&conversation_id) { Ok(id) => id, Err(e) => { tracing::error!("Failed to parse conversation_id: {e}"); @@ -493,11 +496,7 @@ impl MessageProcessor { let outgoing = self.outgoing.clone(); let running_requests_id_to_codex_uuid = self.running_requests_id_to_codex_uuid.clone(); - let codex = match self - .conversation_manager - .get_conversation(conversation_id) - .await - { + let codex = match self.thread_manager.get_thread(conversation_id).await { Ok(c) => c, Err(_) => { tracing::warn!("Session not found for conversation_id: {conversation_id}"); @@ -578,11 +577,7 @@ impl MessageProcessor { tracing::info!("conversation_id: {conversation_id}"); // Obtain the Codex conversation from the server. - let codex_arc = match self - .conversation_manager - .get_conversation(conversation_id) - .await - { + let codex_arc = match self.thread_manager.get_thread(conversation_id).await { Ok(c) => c, Err(_) => { tracing::warn!("Session not found for conversation_id: {conversation_id}"); diff --git a/codex-rs/mcp-server/src/outgoing_message.rs b/codex-rs/mcp-server/src/outgoing_message.rs index 83ac25fdfd4..fef5c8bac7f 100644 --- a/codex-rs/mcp-server/src/outgoing_message.rs +++ b/codex-rs/mcp-server/src/outgoing_message.rs @@ -238,7 +238,7 @@ mod tests { use codex_core::protocol::EventMsg; use codex_core::protocol::SandboxPolicy; use codex_core::protocol::SessionConfiguredEvent; - use codex_protocol::ConversationId; + use codex_protocol::ThreadId; use codex_protocol::openai_models::ReasoningEffort; use pretty_assertions::assert_eq; use serde_json::json; @@ -251,7 +251,7 @@ mod tests { let (outgoing_tx, mut outgoing_rx) = mpsc::unbounded_channel::(); let outgoing_message_sender = OutgoingMessageSender::new(outgoing_tx); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let rollout_file = NamedTempFile::new()?; let event = Event { id: "1".to_string(), @@ -292,7 +292,7 @@ mod tests { let (outgoing_tx, mut outgoing_rx) = mpsc::unbounded_channel::(); let outgoing_message_sender = OutgoingMessageSender::new(outgoing_tx); - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let rollout_file = NamedTempFile::new()?; let session_configured_event = SessionConfiguredEvent { session_id: conversation_id, diff --git a/codex-rs/mcp-server/src/patch_approval.rs b/codex-rs/mcp-server/src/patch_approval.rs index 3c614ab3317..00e4f204afd 100644 --- a/codex-rs/mcp-server/src/patch_approval.rs +++ b/codex-rs/mcp-server/src/patch_approval.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; -use codex_core::CodexConversation; +use codex_core::CodexThread; use codex_core::protocol::FileChange; use codex_core::protocol::Op; use codex_core::protocol::ReviewDecision; @@ -47,7 +47,7 @@ pub(crate) async fn handle_patch_approval_request( grant_root: Option, changes: HashMap, outgoing: Arc, - codex: Arc, + codex: Arc, request_id: RequestId, tool_call_id: String, event_id: String, @@ -111,7 +111,7 @@ pub(crate) async fn handle_patch_approval_request( pub(crate) async fn on_patch_approval_response( event_id: String, receiver: tokio::sync::oneshot::Receiver, - codex: Arc, + codex: Arc, ) { let response = receiver.await; let value = match response { diff --git a/codex-rs/mcp-types/schema/2025-03-26/schema.json b/codex-rs/mcp-types/schema/2025-03-26/schema.json index a1e3f26799d..328ff95f4b8 100644 --- a/codex-rs/mcp-types/schema/2025-03-26/schema.json +++ b/codex-rs/mcp-types/schema/2025-03-26/schema.json @@ -1167,7 +1167,7 @@ "description": "Hints to use for model selection.\n\nKeys not declared here are currently left unspecified by the spec and are up\nto the client to interpret.", "properties": { "name": { - "description": "A hint for a model name.\n\nThe client SHOULD treat this as a substring of a model name; for example:\n - `claude-3-5-sonnet` should match `claude-3-5-sonnet-20241022`\n - `sonnet` should match `claude-3-5-sonnet-20241022`, `claude-3-sonnet-20240229`, etc.\n - `claude` should match any Claude model\n\nThe client MAY also map the string to a different provider's model name or a different model family, as long as it fills a similar niche; for example:\n - `gemini-1.5-flash` could match `claude-3-haiku-20240307`", + "description": "A hint for a model name.\n\nThe client SHOULD treat this as a substring of a model name; for example:\n - `claude-3-5-sonnet` should match `claude-3-5-sonnet-20241022`\n - `sonnet` should match `claude-3-5-sonnet-20241022`, `claude-3-sonnet-20240229`, etc.\n - `claude` should match any Claude model\n\nThe client MAY also map the string to a different provider's model name or a different model info, as long as it fills a similar niche; for example:\n - `gemini-1.5-flash` could match `claude-3-haiku-20240307`", "type": "string" } }, @@ -2136,4 +2136,3 @@ } } } - diff --git a/codex-rs/mcp-types/schema/2025-06-18/schema.json b/codex-rs/mcp-types/schema/2025-06-18/schema.json index 24ba4f6309f..d5faee82cdb 100644 --- a/codex-rs/mcp-types/schema/2025-06-18/schema.json +++ b/codex-rs/mcp-types/schema/2025-06-18/schema.json @@ -1334,7 +1334,7 @@ "description": "Hints to use for model selection.\n\nKeys not declared here are currently left unspecified by the spec and are up\nto the client to interpret.", "properties": { "name": { - "description": "A hint for a model name.\n\nThe client SHOULD treat this as a substring of a model name; for example:\n - `claude-3-5-sonnet` should match `claude-3-5-sonnet-20241022`\n - `sonnet` should match `claude-3-5-sonnet-20241022`, `claude-3-sonnet-20240229`, etc.\n - `claude` should match any Claude model\n\nThe client MAY also map the string to a different provider's model name or a different model family, as long as it fills a similar niche; for example:\n - `gemini-1.5-flash` could match `claude-3-haiku-20240307`", + "description": "A hint for a model name.\n\nThe client SHOULD treat this as a substring of a model name; for example:\n - `claude-3-5-sonnet` should match `claude-3-5-sonnet-20241022`\n - `sonnet` should match `claude-3-5-sonnet-20241022`, `claude-3-sonnet-20240229`, etc.\n - `claude` should match any Claude model\n\nThe client MAY also map the string to a different provider's model name or a different model info, as long as it fills a similar niche; for example:\n - `gemini-1.5-flash` could match `claude-3-haiku-20240307`", "type": "string" } }, @@ -2514,4 +2514,3 @@ } } } - diff --git a/codex-rs/otel/Cargo.toml b/codex-rs/otel/Cargo.toml index a703808e80b..eb19ec7df78 100644 --- a/codex-rs/otel/Cargo.toml +++ b/codex-rs/otel/Cargo.toml @@ -12,6 +12,13 @@ path = "src/lib.rs" [lints] workspace = true +[features] +## Disables the built-in default metrics exporter. +## +## Intended for use from `dev-dependencies` so unit/integration tests never +## attempt to export metrics over the network. +disable-default-metrics-exporter = [] + [dependencies] chrono = { workspace = true } codex-app-server-protocol = { workspace = true } @@ -19,13 +26,14 @@ codex-utils-absolute-path = { workspace = true } codex-api = { workspace = true } codex-protocol = { workspace = true } eventsource-stream = { workspace = true } -opentelemetry = { workspace = true, features = ["logs", "trace"] } +opentelemetry = { workspace = true, features = ["logs", "metrics", "trace"] } opentelemetry-appender-tracing = { workspace = true } opentelemetry-otlp = { workspace = true, features = [ "grpc-tonic", "http-proto", "http-json", "logs", + "metrics", "trace", "reqwest-blocking-client", "reqwest-rustls", @@ -33,16 +41,13 @@ opentelemetry-otlp = { workspace = true, features = [ "tls-roots", ]} opentelemetry-semantic-conventions = { workspace = true } -opentelemetry_sdk = { workspace = true, features = [ - "logs", - "rt-tokio", - "trace", -]} +opentelemetry_sdk = { workspace = true, features = ["logs", "metrics", "rt-tokio", "testing", "trace"] } http = { workspace = true } reqwest = { workspace = true, features = ["blocking", "rustls-tls"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } strum_macros = { workspace = true } +thiserror = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } tracing-opentelemetry = { workspace = true } @@ -50,3 +55,4 @@ tracing-subscriber = { workspace = true } [dev-dependencies] opentelemetry_sdk = { workspace = true, features = ["testing"] } +pretty_assertions = { workspace = true } diff --git a/codex-rs/otel/README.md b/codex-rs/otel/README.md new file mode 100644 index 00000000000..79f4e8f45c9 --- /dev/null +++ b/codex-rs/otel/README.md @@ -0,0 +1,128 @@ +# codex-otel + +`codex-otel` is the OpenTelemetry integration crate for Codex. It provides: + +- Trace/log exporters and tracing subscriber layers (`codex_otel::traces::otel_provider`). +- A structured event helper (`codex_otel::OtelManager`). +- OpenTelemetry metrics support via OTLP exporters (`codex_otel::metrics`). +- A metrics facade on `OtelManager` so tracing + metrics share metadata. + +## Tracing and logs + +Create an OTEL provider from `OtelSettings`, then attach its layers to your +`tracing_subscriber` registry: + +```rust +use codex_otel::config::OtelExporter; +use codex_otel::config::OtelHttpProtocol; +use codex_otel::config::OtelSettings; +use codex_otel::traces::otel_provider::OtelProvider; +use tracing_subscriber::prelude::*; + +let settings = OtelSettings { + environment: "dev".to_string(), + service_name: "codex-cli".to_string(), + service_version: env!("CARGO_PKG_VERSION").to_string(), + codex_home: std::path::PathBuf::from("/tmp"), + exporter: OtelExporter::OtlpHttp { + endpoint: "https://otlp.example.com".to_string(), + headers: std::collections::HashMap::new(), + protocol: OtelHttpProtocol::Binary, + tls: None, + }, + trace_exporter: OtelExporter::OtlpHttp { + endpoint: "https://otlp.example.com".to_string(), + headers: std::collections::HashMap::new(), + protocol: OtelHttpProtocol::Binary, + tls: None, + }, + metrics_exporter: OtelExporter::None, +}; + +if let Some(provider) = OtelProvider::from(&settings)? { + let registry = tracing_subscriber::registry() + .with(provider.logger_layer()) + .with(provider.tracing_layer()); + registry.init(); +} +``` + +## OtelManager (events) + +`OtelManager` adds consistent metadata to tracing events and helps record +Codex-specific events. + +```rust +use codex_otel::OtelManager; + +let manager = OtelManager::new( + conversation_id, + model, + slug, + account_id, + account_email, + auth_mode, + log_user_prompts, + terminal_type, + session_source, +); + +manager.user_prompt(&prompt_items); +``` + +## Metrics (OTLP or in-memory) + +Modes: + +- OTLP: exports metrics via the OpenTelemetry OTLP exporter (HTTP or gRPC). +- In-memory: records via `opentelemetry_sdk::metrics::InMemoryMetricExporter` for tests/assertions; call `shutdown()` to flush. + +`codex-otel` also provides `OtelExporter::Statsig`, a shorthand for exporting OTLP/HTTP JSON metrics +to Statsig using Codex-internal defaults. + +Statsig ingestion (OTLP/HTTP JSON) example: + +```rust +use codex_otel::config::{OtelExporter, OtelHttpProtocol}; + +let metrics = MetricsClient::new(MetricsConfig::otlp( + "dev", + "codex-cli", + env!("CARGO_PKG_VERSION"), + OtelExporter::OtlpHttp { + endpoint: "https://api.statsig.com/otlp".to_string(), + headers: std::collections::HashMap::from([( + "statsig-api-key".to_string(), + std::env::var("STATSIG_SERVER_SDK_SECRET")?, + )]), + protocol: OtelHttpProtocol::Json, + tls: None, + }, +))?; + +metrics.counter("codex.session_started", 1, &[("source", "tui")])?; +metrics.histogram("codex.request_latency", 83, &[("route", "chat")])?; +``` + +In-memory (tests): + +```rust +let exporter = InMemoryMetricExporter::default(); +let metrics = MetricsClient::new(MetricsConfig::in_memory( + "test", + "codex-cli", + env!("CARGO_PKG_VERSION"), + exporter.clone(), +))?; +metrics.counter("codex.turns", 1, &[("model", "gpt-5.1")])?; +metrics.shutdown()?; // flushes in-memory exporter +``` + +## Shutdown + +- `OtelProvider::shutdown()` stops the OTEL exporter. +- `OtelManager::shutdown_metrics()` flushes and shuts down the metrics provider. + +Both are optional because drop performs best-effort shutdown, but calling them +explicitly gives deterministic flushing (or a shutdown error if flushing does +not complete in time). diff --git a/codex-rs/otel/src/config.rs b/codex-rs/otel/src/config.rs index 935c0379fbe..f8f2d5a1063 100644 --- a/codex-rs/otel/src/config.rs +++ b/codex-rs/otel/src/config.rs @@ -3,6 +3,31 @@ use std::path::PathBuf; use codex_utils_absolute_path::AbsolutePathBuf; +pub(crate) const STATSIG_OTLP_HTTP_ENDPOINT: &str = "https://ab.chatgpt.com/otlp/v1/metrics"; +pub(crate) const STATSIG_API_KEY_HEADER: &str = "statsig-api-key"; +pub(crate) const STATSIG_API_KEY: &str = "client-MkRuleRQBd6qakfnDYqJVR9JuXcY57Ljly3vi5JVUIO"; + +pub(crate) fn resolve_exporter(exporter: &OtelExporter) -> OtelExporter { + match exporter { + OtelExporter::Statsig => { + if cfg!(test) || cfg!(feature = "disable-default-metrics-exporter") { + return OtelExporter::None; + } + + OtelExporter::OtlpHttp { + endpoint: STATSIG_OTLP_HTTP_ENDPOINT.to_string(), + headers: HashMap::from([( + STATSIG_API_KEY_HEADER.to_string(), + STATSIG_API_KEY.to_string(), + )]), + protocol: OtelHttpProtocol::Json, + tls: None, + } + } + _ => exporter.clone(), + } +} + #[derive(Clone, Debug)] pub struct OtelSettings { pub environment: String, @@ -11,6 +36,7 @@ pub struct OtelSettings { pub codex_home: PathBuf, pub exporter: OtelExporter, pub trace_exporter: OtelExporter, + pub metrics_exporter: OtelExporter, } #[derive(Clone, Debug)] @@ -31,6 +57,10 @@ pub struct OtelTlsConfig { #[derive(Clone, Debug)] pub enum OtelExporter { None, + /// Statsig metrics ingestion exporter using Codex-internal defaults. + /// + /// This is intended for metrics only. + Statsig, OtlpGrpc { endpoint: String, headers: HashMap, diff --git a/codex-rs/otel/src/lib.rs b/codex-rs/otel/src/lib.rs index 5211c8e89ba..25607623204 100644 --- a/codex-rs/otel/src/lib.rs +++ b/codex-rs/otel/src/lib.rs @@ -1,4 +1,173 @@ pub mod config; +pub mod metrics; +pub mod traces; -pub mod otel_manager; -pub mod otel_provider; +mod otlp; + +use crate::metrics::MetricsClient; +use crate::metrics::MetricsConfig; +use crate::metrics::MetricsError; +use crate::metrics::Result as MetricsResult; +use crate::metrics::timer::Timer; +use crate::metrics::validation::validate_tag_key; +use crate::metrics::validation::validate_tag_value; +use crate::traces::otel_provider::OtelProvider; +use codex_protocol::ThreadId; +use serde::Serialize; +use std::time::Duration; +use strum_macros::Display; +use tracing::Span; + +#[derive(Debug, Clone, Serialize, Display)] +#[serde(rename_all = "snake_case")] +pub enum ToolDecisionSource { + Config, + User, +} + +#[derive(Debug, Clone)] +pub struct OtelEventMetadata { + pub(crate) conversation_id: ThreadId, + pub(crate) auth_mode: Option, + pub(crate) account_id: Option, + pub(crate) account_email: Option, + pub(crate) model: String, + pub(crate) slug: String, + pub(crate) log_user_prompts: bool, + pub(crate) app_version: &'static str, + pub(crate) terminal_type: String, +} + +#[derive(Debug, Clone)] +pub struct OtelManager { + pub(crate) metadata: OtelEventMetadata, + pub(crate) session_span: Span, + pub(crate) metrics: Option, + pub(crate) metrics_use_metadata_tags: bool, +} + +impl OtelManager { + pub fn with_model(mut self, model: &str, slug: &str) -> Self { + self.metadata.model = model.to_owned(); + self.metadata.slug = slug.to_owned(); + self + } + + pub fn with_metrics(mut self, metrics: MetricsClient) -> Self { + self.metrics = Some(metrics); + self.metrics_use_metadata_tags = true; + self + } + + pub fn with_metrics_without_metadata_tags(mut self, metrics: MetricsClient) -> Self { + self.metrics = Some(metrics); + self.metrics_use_metadata_tags = false; + self + } + + pub fn with_metrics_config(self, config: MetricsConfig) -> MetricsResult { + let metrics = MetricsClient::new(config)?; + Ok(self.with_metrics(metrics)) + } + + pub fn with_provider_metrics(self, provider: &OtelProvider) -> Self { + match provider.metrics() { + Some(metrics) => self.with_metrics(metrics.clone()), + None => self, + } + } + + pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) { + let res: MetricsResult<()> = (|| { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + + let tags = self.tags_with_metadata(tags)?; + metrics.counter(name, inc, &tags) + })(); + + if let Err(e) = res { + tracing::warn!("metrics counter [{name}] failed: {e}"); + } + } + + pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) { + let res: MetricsResult<()> = (|| { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + + let tags = self.tags_with_metadata(tags)?; + metrics.histogram(name, value, &tags) + })(); + + if let Err(e) = res { + tracing::warn!("metrics histogram [{name}] failed: {e}"); + } + } + + pub fn record_duration(&self, name: &str, duration: Duration, tags: &[(&str, &str)]) { + let res: MetricsResult<()> = (|| { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + + let tags = self.tags_with_metadata(tags)?; + metrics.record_duration(name, duration, &tags) + })(); + + if let Err(e) = res { + tracing::warn!("metrics duration [{name}] failed: {e}"); + } + } + + pub fn start_timer(&self, name: &str, tags: &[(&str, &str)]) -> Result { + let Some(metrics) = &self.metrics else { + return Err(MetricsError::ExporterDisabled); + }; + let tags = self.tags_with_metadata(tags)?; + metrics.start_timer(name, &tags) + } + + pub fn shutdown_metrics(&self) -> MetricsResult<()> { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + metrics.shutdown() + } + + fn tags_with_metadata<'a>( + &'a self, + tags: &'a [(&'a str, &'a str)], + ) -> MetricsResult> { + let mut merged = self.metadata_tag_refs()?; + merged.extend(tags.iter().copied()); + Ok(merged) + } + + fn metadata_tag_refs(&self) -> MetricsResult> { + if !self.metrics_use_metadata_tags { + return Ok(Vec::new()); + } + let mut tags = Vec::with_capacity(5); + Self::push_metadata_tag(&mut tags, "auth_mode", self.metadata.auth_mode.as_deref())?; + Self::push_metadata_tag(&mut tags, "model", Some(self.metadata.model.as_str()))?; + Self::push_metadata_tag(&mut tags, "app.version", Some(self.metadata.app_version))?; + Ok(tags) + } + + fn push_metadata_tag<'a>( + tags: &mut Vec<(&'a str, &'a str)>, + key: &'static str, + value: Option<&'a str>, + ) -> MetricsResult<()> { + let Some(value) = value else { + return Ok(()); + }; + validate_tag_key(key)?; + validate_tag_value(value)?; + tags.push((key, value)); + Ok(()) + } +} diff --git a/codex-rs/otel/src/metrics/client.rs b/codex-rs/otel/src/metrics/client.rs new file mode 100644 index 00000000000..362199d6989 --- /dev/null +++ b/codex-rs/otel/src/metrics/client.rs @@ -0,0 +1,291 @@ +use crate::config::OtelExporter; +use crate::config::OtelHttpProtocol; +use crate::metrics::MetricsError; +use crate::metrics::Result; +use crate::metrics::config::MetricsConfig; +use crate::metrics::config::MetricsExporter; +use crate::metrics::timer::Timer; +use crate::metrics::validation::validate_metric_name; +use crate::metrics::validation::validate_tag_key; +use crate::metrics::validation::validate_tag_value; +use crate::metrics::validation::validate_tags; +use opentelemetry::KeyValue; +use opentelemetry::metrics::Counter; +use opentelemetry::metrics::Histogram; +use opentelemetry::metrics::Meter; +use opentelemetry::metrics::MeterProvider as _; +use opentelemetry_otlp::OTEL_EXPORTER_OTLP_METRICS_TIMEOUT; +use opentelemetry_otlp::Protocol; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_otlp::WithHttpConfig; +use opentelemetry_otlp::WithTonicConfig; +use opentelemetry_otlp::tonic_types::metadata::MetadataMap; +use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; +use opentelemetry_sdk::Resource; +use opentelemetry_sdk::metrics::PeriodicReader; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_sdk::metrics::Temporality; +use opentelemetry_semantic_conventions as semconv; +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::sync::Mutex; +use std::time::Duration; +use tracing::debug; + +const ENV_ATTRIBUTE: &str = "env"; +const METER_NAME: &str = "codex"; + +#[derive(Debug)] +struct MetricsClientInner { + meter_provider: SdkMeterProvider, + meter: Meter, + counters: Mutex>>, + histograms: Mutex>>, + default_tags: BTreeMap, +} + +impl MetricsClientInner { + fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) -> Result<()> { + validate_metric_name(name)?; + if inc < 0 { + return Err(MetricsError::NegativeCounterIncrement { + name: name.to_string(), + inc, + }); + } + let attributes = self.attributes(tags)?; + + let mut counters = self + .counters + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let counter = counters + .entry(name.to_string()) + .or_insert_with(|| self.meter.u64_counter(name.to_string()).build()); + counter.add(inc as u64, &attributes); + Ok(()) + } + + fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) -> Result<()> { + validate_metric_name(name)?; + let attributes = self.attributes(tags)?; + + let mut histograms = self + .histograms + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let histogram = histograms + .entry(name.to_string()) + .or_insert_with(|| self.meter.f64_histogram(name.to_string()).build()); + histogram.record(value as f64, &attributes); + Ok(()) + } + + fn attributes(&self, tags: &[(&str, &str)]) -> Result> { + if tags.is_empty() { + return Ok(self + .default_tags + .iter() + .map(|(key, value)| KeyValue::new(key.clone(), value.clone())) + .collect()); + } + + let mut merged = self.default_tags.clone(); + for (key, value) in tags { + validate_tag_key(key)?; + validate_tag_value(value)?; + merged.insert((*key).to_string(), (*value).to_string()); + } + + Ok(merged + .into_iter() + .map(|(key, value)| KeyValue::new(key, value)) + .collect()) + } + + fn shutdown(&self) -> Result<()> { + debug!("flushing OTEL metrics"); + self.meter_provider + .force_flush() + .map_err(|source| MetricsError::ProviderShutdown { source })?; + self.meter_provider + .shutdown() + .map_err(|source| MetricsError::ProviderShutdown { source })?; + Ok(()) + } +} + +/// OpenTelemetry metrics client used by Codex. +#[derive(Clone, Debug)] +pub struct MetricsClient(std::sync::Arc); + +impl MetricsClient { + /// Build a metrics client from configuration and validate defaults. + pub fn new(config: MetricsConfig) -> Result { + validate_tags(&config.default_tags)?; + + let resource = Resource::builder() + .with_service_name(config.service_name.clone()) + .with_attributes(vec![ + KeyValue::new( + semconv::attribute::SERVICE_VERSION, + config.service_version.clone(), + ), + KeyValue::new(ENV_ATTRIBUTE, config.environment.clone()), + ]) + .build(); + + let temporality = Temporality::default(); + let (meter_provider, meter) = match config.exporter { + MetricsExporter::InMemory(exporter) => { + build_provider(resource, exporter, config.export_interval) + } + MetricsExporter::Otlp(exporter) => { + let exporter = build_otlp_metric_exporter(exporter, temporality)?; + build_provider(resource, exporter, config.export_interval) + } + }; + + Ok(Self(std::sync::Arc::new(MetricsClientInner { + meter_provider, + meter, + counters: Mutex::new(HashMap::new()), + histograms: Mutex::new(HashMap::new()), + default_tags: config.default_tags, + }))) + } + + /// Send a single counter increment. + pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) -> Result<()> { + self.0.counter(name, inc, tags) + } + + /// Send a single histogram sample. + pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) -> Result<()> { + self.0.histogram(name, value, tags) + } + + /// Record a duration in milliseconds using a histogram. + pub fn record_duration( + &self, + name: &str, + duration: Duration, + tags: &[(&str, &str)], + ) -> Result<()> { + self.histogram( + name, + duration.as_millis().min(i64::MAX as u128) as i64, + tags, + ) + } + + pub fn start_timer( + &self, + name: &str, + tags: &[(&str, &str)], + ) -> std::result::Result { + Ok(Timer::new(name, tags, self)) + } + + /// Flush metrics and stop the underlying OTEL meter provider. + pub fn shutdown(&self) -> Result<()> { + self.0.shutdown() + } +} + +fn build_provider( + resource: Resource, + exporter: E, + interval: Option, +) -> (SdkMeterProvider, Meter) +where + E: opentelemetry_sdk::metrics::exporter::PushMetricExporter + 'static, +{ + let mut reader_builder = PeriodicReader::builder(exporter); + if let Some(interval) = interval { + reader_builder = reader_builder.with_interval(interval); + } + let reader = reader_builder.build(); + let provider = SdkMeterProvider::builder() + .with_resource(resource) + .with_reader(reader) + .build(); + let meter = provider.meter(METER_NAME); + (provider, meter) +} + +fn build_otlp_metric_exporter( + exporter: OtelExporter, + temporality: Temporality, +) -> Result { + match exporter { + OtelExporter::None => Err(MetricsError::ExporterDisabled), + OtelExporter::Statsig => build_otlp_metric_exporter( + crate::config::resolve_exporter(&OtelExporter::Statsig), + temporality, + ), + OtelExporter::OtlpGrpc { + endpoint, + headers, + tls, + } => { + debug!("Using OTLP Grpc exporter for metrics: {endpoint}"); + + let header_map = crate::otlp::build_header_map(&headers); + + let base_tls_config = ClientTlsConfig::new() + .with_enabled_roots() + .assume_http2(true); + + let tls_config = match tls.as_ref() { + Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls) + .map_err(|err| MetricsError::InvalidConfig { + message: err.to_string(), + })?, + None => base_tls_config, + }; + + opentelemetry_otlp::MetricExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_temporality(temporality) + .with_metadata(MetadataMap::from_headers(header_map)) + .with_tls_config(tls_config) + .build() + .map_err(|source| MetricsError::ExporterBuild { source }) + } + OtelExporter::OtlpHttp { + endpoint, + headers, + protocol, + tls, + } => { + debug!("Using OTLP Http exporter for metrics: {endpoint}"); + + let protocol = match protocol { + OtelHttpProtocol::Binary => Protocol::HttpBinary, + OtelHttpProtocol::Json => Protocol::HttpJson, + }; + + let mut exporter_builder = opentelemetry_otlp::MetricExporter::builder() + .with_http() + .with_endpoint(endpoint) + .with_temporality(temporality) + .with_protocol(protocol) + .with_headers(headers); + + if let Some(tls) = tls.as_ref() { + let client = + crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_METRICS_TIMEOUT) + .map_err(|err| MetricsError::InvalidConfig { + message: err.to_string(), + })?; + exporter_builder = exporter_builder.with_http_client(client); + } + + exporter_builder + .build() + .map_err(|source| MetricsError::ExporterBuild { source }) + } + } +} diff --git a/codex-rs/otel/src/metrics/config.rs b/codex-rs/otel/src/metrics/config.rs new file mode 100644 index 00000000000..c7a459183be --- /dev/null +++ b/codex-rs/otel/src/metrics/config.rs @@ -0,0 +1,74 @@ +use crate::config::OtelExporter; +use crate::metrics::Result; +use crate::metrics::validation::validate_tag_key; +use crate::metrics::validation::validate_tag_value; +use opentelemetry_sdk::metrics::InMemoryMetricExporter; +use std::collections::BTreeMap; +use std::time::Duration; + +#[derive(Clone, Debug)] +pub enum MetricsExporter { + Otlp(OtelExporter), + InMemory(InMemoryMetricExporter), +} + +#[derive(Clone, Debug)] +pub struct MetricsConfig { + pub(crate) environment: String, + pub(crate) service_name: String, + pub(crate) service_version: String, + pub(crate) exporter: MetricsExporter, + pub(crate) export_interval: Option, + pub(crate) default_tags: BTreeMap, +} + +impl MetricsConfig { + pub fn otlp( + environment: impl Into, + service_name: impl Into, + service_version: impl Into, + exporter: OtelExporter, + ) -> Self { + Self { + environment: environment.into(), + service_name: service_name.into(), + service_version: service_version.into(), + exporter: MetricsExporter::Otlp(exporter), + export_interval: None, + default_tags: BTreeMap::new(), + } + } + + /// Create an in-memory config (used in tests). + pub fn in_memory( + environment: impl Into, + service_name: impl Into, + service_version: impl Into, + exporter: InMemoryMetricExporter, + ) -> Self { + Self { + environment: environment.into(), + service_name: service_name.into(), + service_version: service_version.into(), + exporter: MetricsExporter::InMemory(exporter), + export_interval: None, + default_tags: BTreeMap::new(), + } + } + + /// Override the interval between periodic metric exports. + pub fn with_export_interval(mut self, interval: Duration) -> Self { + self.export_interval = Some(interval); + self + } + + /// Add a default tag that will be sent with every metric. + pub fn with_tag(mut self, key: impl Into, value: impl Into) -> Result { + let key = key.into(); + let value = value.into(); + validate_tag_key(&key)?; + validate_tag_value(&value)?; + self.default_tags.insert(key, value); + Ok(self) + } +} diff --git a/codex-rs/otel/src/metrics/error.rs b/codex-rs/otel/src/metrics/error.rs new file mode 100644 index 00000000000..dfb9653254a --- /dev/null +++ b/codex-rs/otel/src/metrics/error.rs @@ -0,0 +1,37 @@ +use thiserror::Error; + +pub type Result = std::result::Result; + +#[derive(Debug, Error)] +pub enum MetricsError { + // Metrics. + #[error("metric name cannot be empty")] + EmptyMetricName, + #[error("metric name contains invalid characters: {name}")] + InvalidMetricName { name: String }, + #[error("{label} cannot be empty")] + EmptyTagComponent { label: String }, + #[error("{label} contains invalid characters: {value}")] + InvalidTagComponent { label: String, value: String }, + + #[error("metrics exporter is disabled")] + ExporterDisabled, + + #[error("counter increment must be non-negative for {name}: {inc}")] + NegativeCounterIncrement { name: String, inc: i64 }, + + #[error("failed to build OTLP metrics exporter")] + ExporterBuild { + #[source] + source: opentelemetry_otlp::ExporterBuildError, + }, + + #[error("invalid OTLP metrics configuration: {message}")] + InvalidConfig { message: String }, + + #[error("failed to flush or shutdown metrics provider")] + ProviderShutdown { + #[source] + source: opentelemetry_sdk::error::OTelSdkError, + }, +} diff --git a/codex-rs/otel/src/metrics/mod.rs b/codex-rs/otel/src/metrics/mod.rs new file mode 100644 index 00000000000..b13d5f917e3 --- /dev/null +++ b/codex-rs/otel/src/metrics/mod.rs @@ -0,0 +1,22 @@ +mod client; +mod config; +mod error; +pub(crate) mod timer; +pub(crate) mod validation; + +pub use crate::metrics::client::MetricsClient; +pub use crate::metrics::config::MetricsConfig; +pub use crate::metrics::config::MetricsExporter; +pub use crate::metrics::error::MetricsError; +pub use crate::metrics::error::Result; +use std::sync::OnceLock; + +static GLOBAL_METRICS: OnceLock = OnceLock::new(); + +pub(crate) fn install_global(metrics: MetricsClient) { + let _ = GLOBAL_METRICS.set(metrics); +} + +pub(crate) fn global() -> Option { + GLOBAL_METRICS.get().cloned() +} diff --git a/codex-rs/otel/src/metrics/timer.rs b/codex-rs/otel/src/metrics/timer.rs new file mode 100644 index 00000000000..b1624fda163 --- /dev/null +++ b/codex-rs/otel/src/metrics/timer.rs @@ -0,0 +1,42 @@ +use crate::metrics::MetricsClient; +use crate::metrics::error::Result; +use std::time::Instant; + +pub struct Timer { + name: String, + tags: Vec<(String, String)>, + client: MetricsClient, + start_time: Instant, +} + +impl Drop for Timer { + fn drop(&mut self) { + if let Err(e) = self.record() { + tracing::error!("metrics client error: {}", e); + } + } +} + +impl Timer { + pub(crate) fn new(name: &str, tags: &[(&str, &str)], client: &MetricsClient) -> Self { + Self { + name: name.to_string(), + tags: tags + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + client: client.clone(), + start_time: Instant::now(), + } + } + + pub fn record(&self) -> Result<()> { + let tags = self + .tags + .iter() + .map(|(k, v)| (k.as_str(), v.as_str())) + .collect::>(); + self.client + .record_duration(&self.name, self.start_time.elapsed(), &tags) + } +} diff --git a/codex-rs/otel/src/metrics/validation.rs b/codex-rs/otel/src/metrics/validation.rs new file mode 100644 index 00000000000..ce9e436d8f5 --- /dev/null +++ b/codex-rs/otel/src/metrics/validation.rs @@ -0,0 +1,55 @@ +use crate::metrics::error::MetricsError; +use crate::metrics::error::Result; +use std::collections::BTreeMap; + +pub(crate) fn validate_tags(tags: &BTreeMap) -> Result<()> { + for (key, value) in tags { + validate_tag_key(key)?; + validate_tag_value(value)?; + } + Ok(()) +} + +pub(crate) fn validate_metric_name(name: &str) -> Result<()> { + if name.is_empty() { + return Err(MetricsError::EmptyMetricName); + } + if !name.chars().all(is_metric_char) { + return Err(MetricsError::InvalidMetricName { + name: name.to_string(), + }); + } + Ok(()) +} + +pub(crate) fn validate_tag_key(key: &str) -> Result<()> { + validate_tag_component(key, "tag key")?; + Ok(()) +} + +pub(crate) fn validate_tag_value(value: &str) -> Result<()> { + validate_tag_component(value, "tag value") +} + +fn validate_tag_component(value: &str, label: &str) -> Result<()> { + if value.is_empty() { + return Err(MetricsError::EmptyTagComponent { + label: label.to_string(), + }); + } + if !value.chars().all(is_tag_char) { + return Err(MetricsError::InvalidTagComponent { + label: label.to_string(), + value: value.to_string(), + }); + } + Ok(()) +} + +fn is_metric_char(c: char) -> bool { + c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-') +} + +fn is_tag_char(c: char) -> bool { + c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-' | '/') +} diff --git a/codex-rs/otel/src/otlp.rs b/codex-rs/otel/src/otlp.rs new file mode 100644 index 00000000000..c70e5e55e9e --- /dev/null +++ b/codex-rs/otel/src/otlp.rs @@ -0,0 +1,163 @@ +use crate::config::OtelTlsConfig; +use codex_utils_absolute_path::AbsolutePathBuf; +use http::Uri; +use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TIMEOUT; +use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TIMEOUT_DEFAULT; +use opentelemetry_otlp::tonic_types::transport::Certificate as TonicCertificate; +use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; +use opentelemetry_otlp::tonic_types::transport::Identity as TonicIdentity; +use reqwest::Certificate as ReqwestCertificate; +use reqwest::Identity as ReqwestIdentity; +use reqwest::header::HeaderMap; +use reqwest::header::HeaderName; +use reqwest::header::HeaderValue; +use std::env; +use std::error::Error; +use std::fs; +use std::io; +use std::io::ErrorKind; +use std::path::PathBuf; +use std::time::Duration; + +pub(crate) fn build_header_map(headers: &std::collections::HashMap) -> HeaderMap { + let mut header_map = HeaderMap::new(); + for (key, value) in headers { + if let Ok(name) = HeaderName::from_bytes(key.as_bytes()) + && let Ok(val) = HeaderValue::from_str(value) + { + header_map.insert(name, val); + } + } + header_map +} + +pub(crate) fn build_grpc_tls_config( + endpoint: &str, + tls_config: ClientTlsConfig, + tls: &OtelTlsConfig, +) -> Result> { + let uri: Uri = endpoint.parse()?; + let host = uri.host().ok_or_else(|| { + config_error(format!( + "OTLP gRPC endpoint {endpoint} does not include a host" + )) + })?; + + let mut config = tls_config.domain_name(host.to_owned()); + + if let Some(path) = tls.ca_certificate.as_ref() { + let (pem, _) = read_bytes(path)?; + config = config.ca_certificate(TonicCertificate::from_pem(pem)); + } + + match (&tls.client_certificate, &tls.client_private_key) { + (Some(cert_path), Some(key_path)) => { + let (cert_pem, _) = read_bytes(cert_path)?; + let (key_pem, _) = read_bytes(key_path)?; + config = config.identity(TonicIdentity::from_pem(cert_pem, key_pem)); + } + (Some(_), None) | (None, Some(_)) => { + return Err(config_error( + "client_certificate and client_private_key must both be provided for mTLS", + )); + } + (None, None) => {} + } + + Ok(config) +} + +/// Build a blocking HTTP client with TLS configuration for OTLP HTTP exporters. +/// +/// We use `reqwest::blocking::Client` because OTEL exporters run on dedicated +/// OS threads that are not necessarily backed by tokio. +pub(crate) fn build_http_client( + tls: &OtelTlsConfig, + timeout_var: &str, +) -> Result> { + if tokio::runtime::Handle::try_current().is_ok() { + tokio::task::block_in_place(|| build_http_client_inner(tls, timeout_var)) + } else { + build_http_client_inner(tls, timeout_var) + } +} + +fn build_http_client_inner( + tls: &OtelTlsConfig, + timeout_var: &str, +) -> Result> { + let mut builder = + reqwest::blocking::Client::builder().timeout(resolve_otlp_timeout(timeout_var)); + + if let Some(path) = tls.ca_certificate.as_ref() { + let (pem, location) = read_bytes(path)?; + let certificate = ReqwestCertificate::from_pem(pem.as_slice()).map_err(|error| { + config_error(format!( + "failed to parse certificate {}: {error}", + location.display() + )) + })?; + builder = builder + .tls_built_in_root_certs(false) + .add_root_certificate(certificate); + } + + match (&tls.client_certificate, &tls.client_private_key) { + (Some(cert_path), Some(key_path)) => { + let (mut cert_pem, cert_location) = read_bytes(cert_path)?; + let (key_pem, key_location) = read_bytes(key_path)?; + cert_pem.extend_from_slice(key_pem.as_slice()); + let identity = ReqwestIdentity::from_pem(cert_pem.as_slice()).map_err(|error| { + config_error(format!( + "failed to parse client identity using {} and {}: {error}", + cert_location.display(), + key_location.display() + )) + })?; + builder = builder.identity(identity).https_only(true); + } + (Some(_), None) | (None, Some(_)) => { + return Err(config_error( + "client_certificate and client_private_key must both be provided for mTLS", + )); + } + (None, None) => {} + } + + builder + .build() + .map_err(|error| Box::new(error) as Box) +} + +pub(crate) fn resolve_otlp_timeout(signal_var: &str) -> Duration { + if let Some(timeout) = read_timeout_env(signal_var) { + return timeout; + } + if let Some(timeout) = read_timeout_env(OTEL_EXPORTER_OTLP_TIMEOUT) { + return timeout; + } + OTEL_EXPORTER_OTLP_TIMEOUT_DEFAULT +} + +fn read_timeout_env(var: &str) -> Option { + let value = env::var(var).ok()?; + let parsed = value.parse::().ok()?; + if parsed < 0 { + return None; + } + Some(Duration::from_millis(parsed as u64)) +} + +fn read_bytes(path: &AbsolutePathBuf) -> Result<(Vec, PathBuf), Box> { + match fs::read(path) { + Ok(bytes) => Ok((bytes, path.to_path_buf())), + Err(error) => Err(Box::new(io::Error::new( + error.kind(), + format!("failed to read {}: {error}", path.display()), + ))), + } +} + +fn config_error(message: impl Into) -> Box { + Box::new(io::Error::new(ErrorKind::InvalidData, message.into())) +} diff --git a/codex-rs/otel/src/traces/mod.rs b/codex-rs/otel/src/traces/mod.rs new file mode 100644 index 00000000000..a58949f9ab3 --- /dev/null +++ b/codex-rs/otel/src/traces/mod.rs @@ -0,0 +1,2 @@ +pub mod otel_manager; +pub mod otel_provider; diff --git a/codex-rs/otel/src/otel_manager.rs b/codex-rs/otel/src/traces/otel_manager.rs similarity index 94% rename from codex-rs/otel/src/otel_manager.rs rename to codex-rs/otel/src/traces/otel_manager.rs index fbdd3227222..f431f1a527f 100644 --- a/codex-rs/otel/src/otel_manager.rs +++ b/codex-rs/otel/src/traces/otel_manager.rs @@ -1,9 +1,9 @@ -use crate::otel_provider::traceparent_context_from_env; +use crate::traces::otel_provider::traceparent_context_from_env; use chrono::SecondsFormat; use chrono::Utc; use codex_api::ResponseEvent; use codex_app_server_protocol::AuthMode; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ReasoningEffort; @@ -16,48 +16,24 @@ use eventsource_stream::Event as StreamEvent; use eventsource_stream::EventStreamError as StreamError; use reqwest::Error; use reqwest::Response; -use serde::Serialize; use std::borrow::Cow; use std::fmt::Display; use std::future::Future; use std::time::Duration; use std::time::Instant; -use strum_macros::Display; use tokio::time::error::Elapsed; use tracing::Span; use tracing::trace_span; use tracing_opentelemetry::OpenTelemetrySpanExt; -#[derive(Debug, Clone, Serialize, Display)] -#[serde(rename_all = "snake_case")] -pub enum ToolDecisionSource { - Config, - User, -} - -#[derive(Debug, Clone)] -pub struct OtelEventMetadata { - conversation_id: ConversationId, - auth_mode: Option, - account_id: Option, - account_email: Option, - model: String, - slug: String, - log_user_prompts: bool, - app_version: &'static str, - terminal_type: String, -} - -#[derive(Debug, Clone)] -pub struct OtelManager { - metadata: OtelEventMetadata, - session_span: Span, -} +pub use crate::OtelEventMetadata; +pub use crate::OtelManager; +pub use crate::ToolDecisionSource; impl OtelManager { #[allow(clippy::too_many_arguments)] pub fn new( - conversation_id: ConversationId, + conversation_id: ThreadId, model: &str, slug: &str, account_id: Option, @@ -86,16 +62,11 @@ impl OtelManager { terminal_type, }, session_span, + metrics: crate::metrics::global(), + metrics_use_metadata_tags: true, } } - pub fn with_model(&self, model: &str, slug: &str) -> Self { - let mut manager = self.clone(); - manager.metadata.model = model.to_owned(); - manager.metadata.slug = slug.to_owned(); - manager - } - pub fn current_span(&self) -> &Span { &self.session_span } @@ -162,7 +133,7 @@ impl OtelManager { F: FnOnce() -> Fut, Fut: Future>, { - let start = std::time::Instant::now(); + let start = Instant::now(); let response = f().await; let duration = start.elapsed(); diff --git a/codex-rs/otel/src/otel_provider.rs b/codex-rs/otel/src/traces/otel_provider.rs similarity index 63% rename from codex-rs/otel/src/otel_provider.rs rename to codex-rs/otel/src/traces/otel_provider.rs index 8a777e7fdd8..b6a542d4bda 100644 --- a/codex-rs/otel/src/otel_provider.rs +++ b/codex-rs/otel/src/traces/otel_provider.rs @@ -1,9 +1,8 @@ use crate::config::OtelExporter; use crate::config::OtelHttpProtocol; use crate::config::OtelSettings; -use crate::config::OtelTlsConfig; -use codex_utils_absolute_path::AbsolutePathBuf; -use http::Uri; +use crate::metrics::MetricsClient; +use crate::metrics::MetricsConfig; use opentelemetry::Context; use opentelemetry::KeyValue; use opentelemetry::context::ContextGuard; @@ -14,8 +13,6 @@ use opentelemetry::trace::TracerProvider as _; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_otlp::LogExporter; use opentelemetry_otlp::OTEL_EXPORTER_OTLP_LOGS_TIMEOUT; -use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TIMEOUT; -use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TIMEOUT_DEFAULT; use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TRACES_TIMEOUT; use opentelemetry_otlp::Protocol; use opentelemetry_otlp::SpanExporter; @@ -23,9 +20,7 @@ use opentelemetry_otlp::WithExportConfig; use opentelemetry_otlp::WithHttpConfig; use opentelemetry_otlp::WithTonicConfig; use opentelemetry_otlp::tonic_types::metadata::MetadataMap; -use opentelemetry_otlp::tonic_types::transport::Certificate as TonicCertificate; use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; -use opentelemetry_otlp::tonic_types::transport::Identity as TonicIdentity; use opentelemetry_sdk::Resource; use opentelemetry_sdk::logs::SdkLoggerProvider; use opentelemetry_sdk::propagation::TraceContextPropagator; @@ -33,21 +28,11 @@ use opentelemetry_sdk::trace::BatchSpanProcessor; use opentelemetry_sdk::trace::SdkTracerProvider; use opentelemetry_sdk::trace::Tracer; use opentelemetry_semantic_conventions as semconv; -use reqwest::Certificate as ReqwestCertificate; -use reqwest::Identity as ReqwestIdentity; -use reqwest::header::HeaderMap; -use reqwest::header::HeaderName; -use reqwest::header::HeaderValue; use std::cell::RefCell; use std::collections::HashMap; use std::env; use std::error::Error; -use std::fs; -use std::io::ErrorKind; -use std::io::{self}; -use std::path::PathBuf; use std::sync::OnceLock; -use std::time::Duration; use tracing::debug; use tracing::level_filters::LevelFilter; use tracing::warn; @@ -63,10 +48,12 @@ thread_local! { static TRACEPARENT_GUARD: RefCell> = const { RefCell::new(None) }; } +// TODO(jif) move OtelProvider out of `traces/` pub struct OtelProvider { pub logger: Option, pub tracer_provider: Option, pub tracer: Option, + pub metrics: Option, } impl OtelProvider { @@ -77,14 +64,33 @@ impl OtelProvider { if let Some(tracer_provider) = &self.tracer_provider { let _ = tracer_provider.shutdown(); } + if let Some(metrics) = &self.metrics { + let _ = metrics.shutdown(); + } } pub fn from(settings: &OtelSettings) -> Result, Box> { let log_enabled = !matches!(settings.exporter, OtelExporter::None); let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None); - if !log_enabled && !trace_enabled { - debug!("No exporter enabled in OTLP settings."); + let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter); + let metrics = if matches!(metric_exporter, OtelExporter::None) { + None + } else { + Some(MetricsClient::new(MetricsConfig::otlp( + settings.environment.clone(), + settings.service_name.clone(), + settings.service_version.clone(), + metric_exporter, + ))?) + }; + + if let Some(metrics) = metrics.as_ref() { + crate::metrics::install_global(metrics.clone()); + } + + if !log_enabled && !trace_enabled && metrics.is_none() { + debug!("No OTEL exporter enabled in settings."); return Ok(None); } @@ -113,6 +119,7 @@ impl OtelProvider { logger, tracer_provider, tracer, + metrics, })) } @@ -141,6 +148,10 @@ impl OtelProvider { pub fn codex_export_filter(meta: &tracing::Metadata<'_>) -> bool { meta.target().starts_with("codex_otel") } + + pub fn metrics(&self) -> Option<&MetricsClient> { + self.metrics.as_ref() + } } impl Drop for OtelProvider { @@ -151,6 +162,9 @@ impl Drop for OtelProvider { if let Some(tracer_provider) = &self.tracer_provider { let _ = tracer_provider.shutdown(); } + if let Some(metrics) = &self.metrics { + let _ = metrics.shutdown(); + } } } @@ -223,8 +237,9 @@ fn build_logger( ) -> Result> { let mut builder = SdkLoggerProvider::builder().with_resource(resource.clone()); - match exporter { + match crate::config::resolve_exporter(exporter) { OtelExporter::None => return Ok(builder.build()), + OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), OtelExporter::OtlpGrpc { endpoint, headers, @@ -232,14 +247,14 @@ fn build_logger( } => { debug!("Using OTLP Grpc exporter: {endpoint}"); - let header_map = build_header_map(headers); + let header_map = crate::otlp::build_header_map(&headers); let base_tls_config = ClientTlsConfig::new() .with_enabled_roots() .assume_http2(true); let tls_config = match tls.as_ref() { - Some(tls) => build_grpc_tls_config(endpoint, base_tls_config, tls)?, + Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?, None => base_tls_config, }; @@ -269,10 +284,10 @@ fn build_logger( .with_http() .with_endpoint(endpoint) .with_protocol(protocol) - .with_headers(headers.clone()); + .with_headers(headers); if let Some(tls) = tls.as_ref() { - let client = build_http_client(tls, OTEL_EXPORTER_OTLP_LOGS_TIMEOUT)?; + let client = crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_LOGS_TIMEOUT)?; exporter_builder = exporter_builder.with_http_client(client); } @@ -289,8 +304,9 @@ fn build_tracer_provider( resource: &Resource, exporter: &OtelExporter, ) -> Result> { - let span_exporter = match exporter { + let span_exporter = match crate::config::resolve_exporter(exporter) { OtelExporter::None => return Ok(SdkTracerProvider::builder().build()), + OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), OtelExporter::OtlpGrpc { endpoint, headers, @@ -298,14 +314,14 @@ fn build_tracer_provider( } => { debug!("Using OTLP Grpc exporter for traces: {endpoint}"); - let header_map = build_header_map(headers); + let header_map = crate::otlp::build_header_map(&headers); let base_tls_config = ClientTlsConfig::new() .with_enabled_roots() .assume_http2(true); let tls_config = match tls.as_ref() { - Some(tls) => build_grpc_tls_config(endpoint, base_tls_config, tls)?, + Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?, None => base_tls_config, }; @@ -333,10 +349,11 @@ fn build_tracer_provider( .with_http() .with_endpoint(endpoint) .with_protocol(protocol) - .with_headers(headers.clone()); + .with_headers(headers); if let Some(tls) = tls.as_ref() { - let client = build_http_client(tls, OTEL_EXPORTER_OTLP_TRACES_TIMEOUT)?; + let client = + crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_TRACES_TIMEOUT)?; exporter_builder = exporter_builder.with_http_client(client); } @@ -352,150 +369,6 @@ fn build_tracer_provider( .build()) } -fn build_header_map(headers: &HashMap) -> HeaderMap { - let mut header_map = HeaderMap::new(); - for (key, value) in headers { - if let Ok(name) = HeaderName::from_bytes(key.as_bytes()) - && let Ok(val) = HeaderValue::from_str(value) - { - header_map.insert(name, val); - } - } - header_map -} - -fn build_grpc_tls_config( - endpoint: &str, - tls_config: ClientTlsConfig, - tls: &OtelTlsConfig, -) -> Result> { - let uri: Uri = endpoint.parse()?; - let host = uri.host().ok_or_else(|| { - config_error(format!( - "OTLP gRPC endpoint {endpoint} does not include a host" - )) - })?; - - let mut config = tls_config.domain_name(host.to_owned()); - - if let Some(path) = tls.ca_certificate.as_ref() { - let (pem, _) = read_bytes(path)?; - config = config.ca_certificate(TonicCertificate::from_pem(pem)); - } - - match (&tls.client_certificate, &tls.client_private_key) { - (Some(cert_path), Some(key_path)) => { - let (cert_pem, _) = read_bytes(cert_path)?; - let (key_pem, _) = read_bytes(key_path)?; - config = config.identity(TonicIdentity::from_pem(cert_pem, key_pem)); - } - (Some(_), None) | (None, Some(_)) => { - return Err(config_error( - "client_certificate and client_private_key must both be provided for mTLS", - )); - } - (None, None) => {} - } - - Ok(config) -} - -/// Build a blocking HTTP client with TLS configuration for the OTLP HTTP exporter. -/// -/// We use `reqwest::blocking::Client` instead of the async client because the -/// `opentelemetry_sdk` `BatchLogProcessor` spawns a dedicated OS thread that uses -/// `futures_executor::block_on()` rather than tokio. When the async reqwest client's -/// timeout calls `tokio::time::sleep()`, it panics with "no reactor running". -fn build_http_client( - tls: &OtelTlsConfig, - timeout_var: &str, -) -> Result> { - // Wrap in block_in_place because reqwest::blocking::Client creates its own - // internal tokio runtime, which would panic if built directly from an async context. - tokio::task::block_in_place(|| build_http_client_inner(tls, timeout_var)) -} - -fn build_http_client_inner( - tls: &OtelTlsConfig, - timeout_var: &str, -) -> Result> { - let mut builder = - reqwest::blocking::Client::builder().timeout(resolve_otlp_timeout(timeout_var)); - - if let Some(path) = tls.ca_certificate.as_ref() { - let (pem, location) = read_bytes(path)?; - let certificate = ReqwestCertificate::from_pem(pem.as_slice()).map_err(|error| { - config_error(format!( - "failed to parse certificate {}: {error}", - location.display() - )) - })?; - // Disable built-in root certificates and use only our custom CA - builder = builder - .tls_built_in_root_certs(false) - .add_root_certificate(certificate); - } - - match (&tls.client_certificate, &tls.client_private_key) { - (Some(cert_path), Some(key_path)) => { - let (mut cert_pem, cert_location) = read_bytes(cert_path)?; - let (key_pem, key_location) = read_bytes(key_path)?; - cert_pem.extend_from_slice(key_pem.as_slice()); - let identity = ReqwestIdentity::from_pem(cert_pem.as_slice()).map_err(|error| { - config_error(format!( - "failed to parse client identity using {} and {}: {error}", - cert_location.display(), - key_location.display() - )) - })?; - builder = builder.identity(identity).https_only(true); - } - (Some(_), None) | (None, Some(_)) => { - return Err(config_error( - "client_certificate and client_private_key must both be provided for mTLS", - )); - } - (None, None) => {} - } - - builder - .build() - .map_err(|error| Box::new(error) as Box) -} - -fn resolve_otlp_timeout(signal_var: &str) -> Duration { - if let Some(timeout) = read_timeout_env(signal_var) { - return timeout; - } - if let Some(timeout) = read_timeout_env(OTEL_EXPORTER_OTLP_TIMEOUT) { - return timeout; - } - OTEL_EXPORTER_OTLP_TIMEOUT_DEFAULT -} - -fn read_timeout_env(var: &str) -> Option { - let value = env::var(var).ok()?; - let parsed = value.parse::().ok()?; - if parsed < 0 { - return None; - } - Some(Duration::from_millis(parsed as u64)) -} - -fn read_bytes(path: &AbsolutePathBuf) -> Result<(Vec, PathBuf), Box> { - match fs::read(path) { - Ok(bytes) => Ok((bytes, path.to_path_buf())), - Err(error) => Err(Box::new(io::Error::new( - error.kind(), - format!("failed to read {}: {error}", path.display()), - ))), - } -} - -fn config_error(message: impl Into) -> Box { - Box::new(io::Error::new(ErrorKind::InvalidData, message.into())) -} - #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/otel/tests/harness/mod.rs b/codex-rs/otel/tests/harness/mod.rs new file mode 100644 index 00000000000..acdba0b7e11 --- /dev/null +++ b/codex-rs/otel/tests/harness/mod.rs @@ -0,0 +1,81 @@ +use codex_otel::metrics::MetricsClient; +use codex_otel::metrics::MetricsConfig; +use codex_otel::metrics::Result; +use opentelemetry::KeyValue; +use opentelemetry_sdk::metrics::InMemoryMetricExporter; +use opentelemetry_sdk::metrics::data::AggregatedMetrics; +use opentelemetry_sdk::metrics::data::Metric; +use opentelemetry_sdk::metrics::data::MetricData; +use opentelemetry_sdk::metrics::data::ResourceMetrics; +use std::collections::BTreeMap; + +pub(crate) fn build_metrics_with_defaults( + default_tags: &[(&str, &str)], +) -> Result<(MetricsClient, InMemoryMetricExporter)> { + let exporter = InMemoryMetricExporter::default(); + let mut config = MetricsConfig::in_memory( + "test", + "codex-cli", + env!("CARGO_PKG_VERSION"), + exporter.clone(), + ); + for (key, value) in default_tags { + config = config.with_tag(*key, *value)?; + } + let metrics = MetricsClient::new(config)?; + Ok((metrics, exporter)) +} + +pub(crate) fn latest_metrics(exporter: &InMemoryMetricExporter) -> ResourceMetrics { + let Ok(metrics) = exporter.get_finished_metrics() else { + panic!("finished metrics error"); + }; + let Some(metrics) = metrics.into_iter().last() else { + panic!("metrics export missing"); + }; + metrics +} + +pub(crate) fn find_metric<'a>( + resource_metrics: &'a ResourceMetrics, + name: &str, +) -> Option<&'a Metric> { + for scope_metrics in resource_metrics.scope_metrics() { + for metric in scope_metrics.metrics() { + if metric.name() == name { + return Some(metric); + } + } + } + None +} + +pub(crate) fn attributes_to_map<'a>( + attributes: impl Iterator, +) -> BTreeMap { + attributes + .map(|kv| (kv.key.as_str().to_string(), kv.value.as_str().to_string())) + .collect() +} + +pub(crate) fn histogram_data( + resource_metrics: &ResourceMetrics, + name: &str, +) -> (Vec, Vec, f64, u64) { + let metric = + find_metric(resource_metrics, name).unwrap_or_else(|| panic!("metric {name} missing")); + match metric.data() { + AggregatedMetrics::F64(data) => match data { + MetricData::Histogram(histogram) => { + let points: Vec<_> = histogram.data_points().collect(); + assert_eq!(points.len(), 1); + let point = points[0]; + let bounds = point.bounds().collect(); + let bucket_counts = point.bucket_counts().collect(); + (bounds, bucket_counts, point.sum(), point.count()) + } + _ => panic!("unexpected histogram aggregation"), + }, + _ => panic!("unexpected metric data type"), + } +} diff --git a/codex-rs/otel/tests/suite/manager_metrics.rs b/codex-rs/otel/tests/suite/manager_metrics.rs new file mode 100644 index 00000000000..1497a5f84c7 --- /dev/null +++ b/codex-rs/otel/tests/suite/manager_metrics.rs @@ -0,0 +1,104 @@ +use crate::harness::attributes_to_map; +use crate::harness::build_metrics_with_defaults; +use crate::harness::find_metric; +use crate::harness::latest_metrics; +use codex_app_server_protocol::AuthMode; +use codex_otel::OtelManager; +use codex_otel::metrics::Result; +use codex_protocol::ThreadId; +use codex_protocol::protocol::SessionSource; +use opentelemetry_sdk::metrics::data::AggregatedMetrics; +use opentelemetry_sdk::metrics::data::MetricData; +use pretty_assertions::assert_eq; +use std::collections::BTreeMap; + +// Ensures OtelManager attaches metadata tags when forwarding metrics. +#[test] +fn manager_attaches_metadata_tags_to_metrics() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[("service", "codex-cli")])?; + let manager = OtelManager::new( + ThreadId::new(), + "gpt-5.1", + "gpt-5.1", + Some("account-id".to_string()), + None, + Some(AuthMode::ApiKey), + true, + "tty".to_string(), + SessionSource::Cli, + ) + .with_metrics(metrics); + + manager.counter("codex.session_started", 1, &[("source", "tui")]); + manager.shutdown_metrics()?; + + let resource_metrics = latest_metrics(&exporter); + let metric = + find_metric(&resource_metrics, "codex.session_started").expect("counter metric missing"); + let attrs = match metric.data() { + AggregatedMetrics::U64(data) => match data { + MetricData::Sum(sum) => { + let points: Vec<_> = sum.data_points().collect(); + assert_eq!(points.len(), 1); + attributes_to_map(points[0].attributes()) + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + + let expected = BTreeMap::from([ + ( + "app.version".to_string(), + env!("CARGO_PKG_VERSION").to_string(), + ), + ("auth_mode".to_string(), AuthMode::ApiKey.to_string()), + ("model".to_string(), "gpt-5.1".to_string()), + ("service".to_string(), "codex-cli".to_string()), + ("source".to_string(), "tui".to_string()), + ]); + assert_eq!(attrs, expected); + + Ok(()) +} + +// Ensures metadata tagging can be disabled when recording via OtelManager. +#[test] +fn manager_allows_disabling_metadata_tags() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[])?; + let manager = OtelManager::new( + ThreadId::new(), + "gpt-4o", + "gpt-4o", + Some("account-id".to_string()), + None, + Some(AuthMode::ApiKey), + true, + "tty".to_string(), + SessionSource::Cli, + ) + .with_metrics_without_metadata_tags(metrics); + + manager.counter("codex.session_started", 1, &[("source", "tui")]); + manager.shutdown_metrics()?; + + let resource_metrics = latest_metrics(&exporter); + let metric = + find_metric(&resource_metrics, "codex.session_started").expect("counter metric missing"); + let attrs = match metric.data() { + AggregatedMetrics::U64(data) => match data { + MetricData::Sum(sum) => { + let points: Vec<_> = sum.data_points().collect(); + assert_eq!(points.len(), 1); + attributes_to_map(points[0].attributes()) + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + + let expected = BTreeMap::from([("source".to_string(), "tui".to_string())]); + assert_eq!(attrs, expected); + + Ok(()) +} diff --git a/codex-rs/otel/tests/suite/mod.rs b/codex-rs/otel/tests/suite/mod.rs new file mode 100644 index 00000000000..c79c7e37c4d --- /dev/null +++ b/codex-rs/otel/tests/suite/mod.rs @@ -0,0 +1,5 @@ +mod manager_metrics; +mod otlp_http_loopback; +mod send; +mod timing; +mod validation; diff --git a/codex-rs/otel/tests/suite/otlp_http_loopback.rs b/codex-rs/otel/tests/suite/otlp_http_loopback.rs new file mode 100644 index 00000000000..599021b3f54 --- /dev/null +++ b/codex-rs/otel/tests/suite/otlp_http_loopback.rs @@ -0,0 +1,192 @@ +use codex_otel::config::OtelExporter; +use codex_otel::config::OtelHttpProtocol; +use codex_otel::metrics::MetricsClient; +use codex_otel::metrics::MetricsConfig; +use codex_otel::metrics::Result; +use std::collections::HashMap; +use std::io::Read as _; +use std::io::Write as _; +use std::net::TcpListener; +use std::net::TcpStream; +use std::sync::mpsc; +use std::thread; +use std::time::Duration; +use std::time::Instant; + +struct CapturedRequest { + path: String, + content_type: Option, + body: Vec, +} + +fn read_http_request( + stream: &mut TcpStream, +) -> std::io::Result<(String, HashMap, Vec)> { + stream.set_read_timeout(Some(Duration::from_secs(2)))?; + + let mut buf = Vec::new(); + let mut scratch = [0u8; 8192]; + let header_end = loop { + let n = stream.read(&mut scratch)?; + if n == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "EOF before headers", + )); + } + buf.extend_from_slice(&scratch[..n]); + if let Some(end) = buf.windows(4).position(|w| w == b"\r\n\r\n") { + break end; + } + if buf.len() > 1024 * 1024 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "headers too large", + )); + } + }; + + let headers_bytes = &buf[..header_end]; + let mut body_bytes = buf[header_end + 4..].to_vec(); + + let headers_str = std::str::from_utf8(headers_bytes).map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("headers not utf-8: {err}"), + ) + })?; + let mut lines = headers_str.split("\r\n"); + let start = lines.next().ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "missing request line") + })?; + let mut parts = start.split_whitespace(); + let _method = parts.next().unwrap_or_default(); + let path = parts + .next() + .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidData, "missing path"))? + .to_string(); + + let mut headers = HashMap::new(); + for line in lines { + let Some((k, v)) = line.split_once(':') else { + continue; + }; + headers.insert(k.trim().to_ascii_lowercase(), v.trim().to_string()); + } + + if let Some(len) = headers + .get("content-length") + .and_then(|v| v.parse::().ok()) + { + while body_bytes.len() < len { + let n = stream.read(&mut scratch)?; + if n == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "EOF before body complete", + )); + } + body_bytes.extend_from_slice(&scratch[..n]); + if body_bytes.len() > len + 1024 * 1024 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "body too large", + )); + } + } + body_bytes.truncate(len); + } + + Ok((path, headers, body_bytes)) +} + +fn write_http_response(stream: &mut TcpStream, status: &str) -> std::io::Result<()> { + let response = format!("HTTP/1.1 {status}\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"); + stream.write_all(response.as_bytes())?; + stream.flush() +} + +#[test] +fn otlp_http_exporter_sends_metrics_to_collector() -> Result<()> { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); + let addr = listener.local_addr().expect("local_addr"); + listener.set_nonblocking(true).expect("set_nonblocking"); + + let (tx, rx) = mpsc::channel::>(); + let server = thread::spawn(move || { + let mut captured = Vec::new(); + let deadline = Instant::now() + Duration::from_secs(3); + + while Instant::now() < deadline { + match listener.accept() { + Ok((mut stream, _)) => { + let result = read_http_request(&mut stream); + let _ = write_http_response(&mut stream, "202 Accepted"); + if let Ok((path, headers, body)) = result { + captured.push(CapturedRequest { + path, + content_type: headers.get("content-type").cloned(), + body, + }); + } + } + Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => { + thread::sleep(Duration::from_millis(10)); + } + Err(_) => break, + } + } + + let _ = tx.send(captured); + }); + + let metrics = MetricsClient::new(MetricsConfig::otlp( + "test", + "codex-cli", + env!("CARGO_PKG_VERSION"), + OtelExporter::OtlpHttp { + endpoint: format!("http://{addr}/v1/metrics"), + headers: HashMap::new(), + protocol: OtelHttpProtocol::Json, + tls: None, + }, + ))?; + + metrics.counter("codex.turns", 1, &[("source", "test")])?; + metrics.shutdown()?; + + server.join().expect("server join"); + let captured = rx.recv_timeout(Duration::from_secs(1)).expect("captured"); + + let request = captured + .iter() + .find(|req| req.path == "/v1/metrics") + .unwrap_or_else(|| { + let paths = captured + .iter() + .map(|req| req.path.as_str()) + .collect::>() + .join(", "); + panic!( + "missing /v1/metrics request; got {}: {paths}", + captured.len() + ); + }); + let content_type = request + .content_type + .as_deref() + .unwrap_or(""); + assert!( + content_type.starts_with("application/json"), + "unexpected content-type: {content_type}" + ); + + let body = String::from_utf8_lossy(&request.body); + assert!( + body.contains("codex.turns"), + "expected metric name not found; body prefix: {}", + &body.chars().take(2000).collect::() + ); + + Ok(()) +} diff --git a/codex-rs/otel/tests/suite/send.rs b/codex-rs/otel/tests/suite/send.rs new file mode 100644 index 00000000000..4e7e0279274 --- /dev/null +++ b/codex-rs/otel/tests/suite/send.rs @@ -0,0 +1,205 @@ +use crate::harness::attributes_to_map; +use crate::harness::build_metrics_with_defaults; +use crate::harness::find_metric; +use crate::harness::histogram_data; +use crate::harness::latest_metrics; +use codex_otel::metrics::Result; +use pretty_assertions::assert_eq; +use std::collections::BTreeMap; + +// Ensures counters/histograms render with default + per-call tags. +#[test] +fn send_builds_payload_with_tags_and_histograms() -> Result<()> { + let (metrics, exporter) = + build_metrics_with_defaults(&[("service", "codex-cli"), ("env", "prod")])?; + + metrics.counter("codex.turns", 1, &[("model", "gpt-5.1"), ("env", "dev")])?; + metrics.histogram("codex.tool_latency", 25, &[("tool", "shell")])?; + metrics.shutdown()?; + + let resource_metrics = latest_metrics(&exporter); + + let counter = find_metric(&resource_metrics, "codex.turns").expect("counter metric missing"); + let counter_attributes = match counter.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(data) => match data { + opentelemetry_sdk::metrics::data::MetricData::Sum(sum) => { + let points: Vec<_> = sum.data_points().collect(); + assert_eq!(points.len(), 1); + assert_eq!(points[0].value(), 1); + attributes_to_map(points[0].attributes()) + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + + let expected_counter_attributes = BTreeMap::from([ + ("service".to_string(), "codex-cli".to_string()), + ("env".to_string(), "dev".to_string()), + ("model".to_string(), "gpt-5.1".to_string()), + ]); + assert_eq!(counter_attributes, expected_counter_attributes); + + let (bounds, bucket_counts, sum, count) = + histogram_data(&resource_metrics, "codex.tool_latency"); + assert!(!bounds.is_empty()); + assert_eq!(bucket_counts.iter().sum::(), 1); + assert_eq!(sum, 25.0); + assert_eq!(count, 1); + + let histogram_attrs = attributes_to_map( + match find_metric(&resource_metrics, "codex.tool_latency").and_then(|metric| { + match metric.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::F64( + opentelemetry_sdk::metrics::data::MetricData::Histogram(histogram), + ) => histogram + .data_points() + .next() + .map(opentelemetry_sdk::metrics::data::HistogramDataPoint::attributes), + _ => None, + } + }) { + Some(attrs) => attrs, + None => panic!("histogram attributes missing"), + }, + ); + let expected_histogram_attributes = BTreeMap::from([ + ("service".to_string(), "codex-cli".to_string()), + ("env".to_string(), "prod".to_string()), + ("tool".to_string(), "shell".to_string()), + ]); + assert_eq!(histogram_attrs, expected_histogram_attributes); + + Ok(()) +} + +// Ensures defaults merge per line and overrides take precedence. +#[test] +fn send_merges_default_tags_per_line() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[ + ("service", "codex-cli"), + ("env", "prod"), + ("region", "us"), + ])?; + + metrics.counter("codex.alpha", 1, &[("env", "dev"), ("component", "alpha")])?; + metrics.counter( + "codex.beta", + 2, + &[("service", "worker"), ("component", "beta")], + )?; + metrics.shutdown()?; + + let resource_metrics = latest_metrics(&exporter); + let alpha_metric = + find_metric(&resource_metrics, "codex.alpha").expect("codex.alpha metric missing"); + let alpha_point = match alpha_metric.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(data) => match data { + opentelemetry_sdk::metrics::data::MetricData::Sum(sum) => { + let points: Vec<_> = sum.data_points().collect(); + assert_eq!(points.len(), 1); + points[0] + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + assert_eq!(alpha_point.value(), 1); + let alpha_attrs = attributes_to_map(alpha_point.attributes()); + let expected_alpha_attrs = BTreeMap::from([ + ("component".to_string(), "alpha".to_string()), + ("env".to_string(), "dev".to_string()), + ("region".to_string(), "us".to_string()), + ("service".to_string(), "codex-cli".to_string()), + ]); + assert_eq!(alpha_attrs, expected_alpha_attrs); + + let beta_metric = + find_metric(&resource_metrics, "codex.beta").expect("codex.beta metric missing"); + let beta_point = match beta_metric.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(data) => match data { + opentelemetry_sdk::metrics::data::MetricData::Sum(sum) => { + let points: Vec<_> = sum.data_points().collect(); + assert_eq!(points.len(), 1); + points[0] + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + assert_eq!(beta_point.value(), 2); + let beta_attrs = attributes_to_map(beta_point.attributes()); + let expected_beta_attrs = BTreeMap::from([ + ("component".to_string(), "beta".to_string()), + ("env".to_string(), "prod".to_string()), + ("region".to_string(), "us".to_string()), + ("service".to_string(), "worker".to_string()), + ]); + assert_eq!(beta_attrs, expected_beta_attrs); + + Ok(()) +} + +// Verifies enqueued metrics are delivered by the background worker. +#[test] +fn client_sends_enqueued_metric() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[])?; + + metrics.counter("codex.turns", 1, &[("model", "gpt-5.1")])?; + metrics.shutdown()?; + + let resource_metrics = latest_metrics(&exporter); + let counter = find_metric(&resource_metrics, "codex.turns").expect("counter metric missing"); + let points = match counter.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(data) => match data { + opentelemetry_sdk::metrics::data::MetricData::Sum(sum) => { + sum.data_points().collect::>() + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + assert_eq!(points.len(), 1); + let point = points[0]; + assert_eq!(point.value(), 1); + let attrs = attributes_to_map(point.attributes()); + assert_eq!(attrs.get("model").map(String::as_str), Some("gpt-5.1")); + + Ok(()) +} + +// Ensures shutdown flushes successfully with in-memory exporters. +#[test] +fn shutdown_flushes_in_memory_exporter() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[])?; + + metrics.counter("codex.turns", 1, &[])?; + metrics.shutdown()?; + + let resource_metrics = latest_metrics(&exporter); + let counter = find_metric(&resource_metrics, "codex.turns").expect("counter metric missing"); + let points = match counter.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(data) => match data { + opentelemetry_sdk::metrics::data::MetricData::Sum(sum) => { + sum.data_points().collect::>() + } + _ => panic!("unexpected counter aggregation"), + }, + _ => panic!("unexpected counter data type"), + }; + assert_eq!(points.len(), 1); + + Ok(()) +} + +// Ensures shutting down without recording metrics does not export anything. +#[test] +fn shutdown_without_metrics_exports_nothing() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[])?; + + metrics.shutdown()?; + + let finished = exporter.get_finished_metrics().unwrap(); + assert!(finished.is_empty(), "expected no metrics exported"); + Ok(()) +} diff --git a/codex-rs/otel/tests/suite/timing.rs b/codex-rs/otel/tests/suite/timing.rs new file mode 100644 index 00000000000..ce4f2f982e7 --- /dev/null +++ b/codex-rs/otel/tests/suite/timing.rs @@ -0,0 +1,68 @@ +use crate::harness::attributes_to_map; +use crate::harness::build_metrics_with_defaults; +use crate::harness::histogram_data; +use crate::harness::latest_metrics; +use codex_otel::metrics::Result; +use pretty_assertions::assert_eq; +use std::time::Duration; + +// Ensures duration recording maps to histogram output. +#[test] +fn record_duration_records_histogram() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[])?; + + metrics.record_duration( + "codex.request_latency", + Duration::from_millis(15), + &[("route", "chat")], + )?; + metrics.shutdown()?; + + let (bounds, bucket_counts, sum, count) = + histogram_data(&latest_metrics(&exporter), "codex.request_latency"); + assert!(!bounds.is_empty()); + assert_eq!(bucket_counts.iter().sum::(), 1); + assert_eq!(sum, 15.0); + assert_eq!(count, 1); + + Ok(()) +} + +// Ensures time_result returns the closure output and records timing. +#[test] +fn timer_result_records_success() -> Result<()> { + let (metrics, exporter) = build_metrics_with_defaults(&[])?; + + { + let timer = metrics.start_timer("codex.request_latency", &[("route", "chat")]); + assert!(timer.is_ok()); + } + + metrics.shutdown()?; + + let resource_metrics = latest_metrics(&exporter); + let (bounds, bucket_counts, _sum, count) = + histogram_data(&resource_metrics, "codex.request_latency"); + assert!(!bounds.is_empty()); + assert_eq!(count, 1); + assert_eq!(bucket_counts.iter().sum::(), 1); + let attrs = attributes_to_map( + match crate::harness::find_metric(&resource_metrics, "codex.request_latency").and_then( + |metric| match metric.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::F64( + opentelemetry_sdk::metrics::data::MetricData::Histogram(histogram), + ) => histogram + .data_points() + .next() + .map(opentelemetry_sdk::metrics::data::HistogramDataPoint::attributes), + _ => None, + }, + ) { + Some(attrs) => attrs, + None => panic!("attributes missing"), + }, + ); + assert_eq!(attrs.get("route").map(String::as_str), Some("chat")); + + Ok(()) +} diff --git a/codex-rs/otel/tests/suite/validation.rs b/codex-rs/otel/tests/suite/validation.rs new file mode 100644 index 00000000000..f88d9fbcd42 --- /dev/null +++ b/codex-rs/otel/tests/suite/validation.rs @@ -0,0 +1,87 @@ +use codex_otel::metrics::MetricsClient; +use codex_otel::metrics::MetricsConfig; +use codex_otel::metrics::MetricsError; +use codex_otel::metrics::Result; +use opentelemetry_sdk::metrics::InMemoryMetricExporter; + +fn build_in_memory_client() -> Result { + let exporter = InMemoryMetricExporter::default(); + let config = MetricsConfig::in_memory("test", "codex-cli", env!("CARGO_PKG_VERSION"), exporter); + MetricsClient::new(config) +} + +// Ensures invalid tag components are rejected during config build. +#[test] +fn invalid_tag_component_is_rejected() -> Result<()> { + let err = MetricsConfig::in_memory( + "test", + "codex-cli", + env!("CARGO_PKG_VERSION"), + InMemoryMetricExporter::default(), + ) + .with_tag("bad key", "value") + .unwrap_err(); + assert!(matches!( + err, + MetricsError::InvalidTagComponent { label, value } + if label == "tag key" && value == "bad key" + )); + Ok(()) +} + +// Ensures per-metric tag keys are validated. +#[test] +fn counter_rejects_invalid_tag_key() -> Result<()> { + let metrics = build_in_memory_client()?; + let err = metrics + .counter("codex.turns", 1, &[("bad key", "value")]) + .unwrap_err(); + assert!(matches!( + err, + MetricsError::InvalidTagComponent { label, value } + if label == "tag key" && value == "bad key" + )); + metrics.shutdown()?; + Ok(()) +} + +// Ensures per-metric tag values are validated. +#[test] +fn histogram_rejects_invalid_tag_value() -> Result<()> { + let metrics = build_in_memory_client()?; + let err = metrics + .histogram("codex.request_latency", 3, &[("route", "bad value")]) + .unwrap_err(); + assert!(matches!( + err, + MetricsError::InvalidTagComponent { label, value } + if label == "tag value" && value == "bad value" + )); + metrics.shutdown()?; + Ok(()) +} + +// Ensures invalid metric names are rejected. +#[test] +fn counter_rejects_invalid_metric_name() -> Result<()> { + let metrics = build_in_memory_client()?; + let err = metrics.counter("bad name", 1, &[]).unwrap_err(); + assert!(matches!( + err, + MetricsError::InvalidMetricName { name } if name == "bad name" + )); + metrics.shutdown()?; + Ok(()) +} + +#[test] +fn counter_rejects_negative_increment() -> Result<()> { + let metrics = build_in_memory_client()?; + let err = metrics.counter("codex.turns", -1, &[]).unwrap_err(); + assert!(matches!( + err, + MetricsError::NegativeCounterIncrement { name, inc } if name == "codex.turns" && inc == -1 + )); + metrics.shutdown()?; + Ok(()) +} diff --git a/codex-rs/otel/tests/tests.rs b/codex-rs/otel/tests/tests.rs new file mode 100644 index 00000000000..92f88b95fd8 --- /dev/null +++ b/codex-rs/otel/tests/tests.rs @@ -0,0 +1,2 @@ +mod harness; +mod suite; diff --git a/codex-rs/protocol/src/lib.rs b/codex-rs/protocol/src/lib.rs index 0d6a0594fc7..513743c97ff 100644 --- a/codex-rs/protocol/src/lib.rs +++ b/codex-rs/protocol/src/lib.rs @@ -1,6 +1,8 @@ pub mod account; -mod conversation_id; -pub use conversation_id::ConversationId; +mod thread_id; +#[allow(deprecated)] +pub use thread_id::ConversationId; +pub use thread_id::ThreadId; pub mod approvals; pub mod config_types; pub mod custom_prompts; diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index ae426e62965..60c7cc74b00 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -159,30 +159,53 @@ impl TruncationPolicyConfig { #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, TS, JsonSchema)] pub struct ClientVersion(pub i32, pub i32, pub i32); +const fn default_effective_context_window_percent() -> i64 { + 95 +} + /// Model metadata returned by the Codex backend `/models` endpoint. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, TS, JsonSchema)] pub struct ModelInfo { pub slug: String, pub display_name: String, pub description: Option, - pub default_reasoning_level: ReasoningEffort, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub default_reasoning_level: Option, pub supported_reasoning_levels: Vec, pub shell_type: ConfigShellToolType, pub visibility: ModelVisibility, pub supported_in_api: bool, pub priority: i32, pub upgrade: Option, - pub base_instructions: Option, + pub base_instructions: String, pub supports_reasoning_summaries: bool, pub support_verbosity: bool, pub default_verbosity: Option, pub apply_patch_tool_type: Option, pub truncation_policy: TruncationPolicyConfig, pub supports_parallel_tool_calls: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] pub context_window: Option, + /// Token threshold for automatic compaction. When omitted, core derives it + /// from `context_window` (90%). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub auto_compact_token_limit: Option, + /// Percentage of the context window considered usable for inputs, after + /// reserving headroom for system prompts, tool overhead, and model output. + #[serde(default = "default_effective_context_window_percent")] + pub effective_context_window_percent: i64, pub experimental_supported_tools: Vec, } +impl ModelInfo { + pub fn auto_compact_token_limit(&self) -> Option { + self.auto_compact_token_limit.or_else(|| { + self.context_window + .map(|context_window| (context_window * 9) / 10) + }) + } +} + /// Response wrapper for `/models`. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, TS, JsonSchema, Default)] pub struct ModelsResponse { @@ -197,7 +220,9 @@ impl From for ModelPreset { model: info.slug.clone(), display_name: info.display_name, description: info.description.unwrap_or_default(), - default_reasoning_effort: info.default_reasoning_level, + default_reasoning_effort: info + .default_reasoning_level + .unwrap_or(ReasoningEffort::None), supported_reasoning_efforts: info.supported_reasoning_levels.clone(), is_default: false, // default is the highest priority available model upgrade: info.upgrade.as_ref().map(|upgrade_slug| ModelUpgrade { diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 48961086292..a2ac3501602 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -10,7 +10,7 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; -use crate::ConversationId; +use crate::ThreadId; use crate::approvals::ElicitationRequestEvent; use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::custom_prompts::CustomPrompt; @@ -74,10 +74,13 @@ pub enum Op { UserInput { /// User input items, see `InputItem` items: Vec, + /// Optional JSON Schema used to constrain the final assistant message for this turn. + #[serde(skip_serializing_if = "Option::is_none")] + final_output_json_schema: Option, }, /// Similar to [`Op::UserInput`], but contains additional context required - /// for a turn of a [`crate::codex_conversation::CodexConversation`]. + /// for a turn of a [`crate::codex_thread::CodexThread`]. UserTurn { /// User input items, see `InputItem` items: Vec, @@ -124,7 +127,7 @@ pub enum Op { #[serde(skip_serializing_if = "Option::is_none")] sandbox_policy: Option, - /// Updated model slug. When set, the model family is derived + /// Updated model slug. When set, the model info is derived /// automatically. #[serde(skip_serializing_if = "Option::is_none")] model: Option, @@ -204,9 +207,20 @@ pub enum Op { /// to generate a summary which will be returned as an AgentMessage event. Compact, + /// Set a user-facing session name in the persisted rollout metadata. + /// This is a local-only operation handled by codex-core; it does not + /// involve the model. + SetSessionName { name: String }, + /// Request Codex to undo a turn (turn are stacked so it is the same effect as CMD + Z). Undo, + /// Request Codex to drop the last N user turns from in-memory context. + /// + /// This does not attempt to revert local filesystem changes. Clients are + /// responsible for undoing any edits on disk. + ThreadRollback { num_turns: u32 }, + /// Request a code review from the agent. Review { review_request: ReviewRequest }, @@ -538,6 +552,9 @@ pub enum EventMsg { /// Conversation history was compacted (either automatically or manually). ContextCompacted(ContextCompactedEvent), + /// Conversation history was rolled back by dropping the last N user turns. + ThreadRolledBack(ThreadRolledBackEvent), + /// Agent has started a task TaskStarted(TaskStartedEvent), @@ -669,6 +686,26 @@ pub enum EventMsg { ReasoningRawContentDelta(ReasoningRawContentDeltaEvent), } +/// Agent lifecycle status, derived from emitted events. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS, Default)] +#[serde(rename_all = "snake_case")] +#[ts(rename_all = "snake_case")] +pub enum AgentStatus { + /// Agent is waiting for initialization. + #[default] + PendingInit, + /// Agent is currently running. + Running, + /// Agent is done. Contains the final assistant message. + Completed(Option), + /// Agent encountered an error. + Errored(String), + /// Agent has been shutdowned. + Shutdown, + /// Agent is not found. + NotFound, +} + /// Codex errors that we expose to clients. #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "snake_case")] @@ -695,6 +732,7 @@ pub enum CodexErrorInfo { ResponseTooManyFailedAttempts { http_status_code: Option, }, + ThreadRollbackFailed, Other, } @@ -705,7 +743,7 @@ pub struct RawResponseItemEvent { #[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] pub struct ItemStartedEvent { - pub thread_id: ConversationId, + pub thread_id: ThreadId, pub turn_id: String, pub item: TurnItem, } @@ -723,7 +761,7 @@ impl HasLegacyEvent for ItemStartedEvent { #[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] pub struct ItemCompletedEvent { - pub thread_id: ConversationId, + pub thread_id: ThreadId, pub turn_id: String, pub item: TurnItem, } @@ -841,6 +879,7 @@ pub struct TaskCompleteEvent { #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct TaskStartedEvent { + // TODO(aibrahim): make this not optional pub model_context_window: Option, } @@ -862,6 +901,7 @@ pub struct TokenUsage { pub struct TokenUsageInfo { pub total_token_usage: TokenUsage, pub last_token_usage: TokenUsage, + // TODO(aibrahim): make this not optional #[ts(type = "number | null")] pub model_context_window: Option, } @@ -1147,17 +1187,18 @@ pub struct WebSearchEndEvent { pub query: String, } +// Conversation kept for backward compatibility. /// Response payload for `Op::GetHistory` containing the current session's /// in-memory transcript. #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct ConversationPathResponseEvent { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub path: PathBuf, } #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct ResumedHistory { - pub conversation_id: ConversationId, + pub conversation_id: ThreadId, pub history: Vec, pub rollout_path: PathBuf, } @@ -1252,9 +1293,11 @@ impl fmt::Display for SubAgentSource { #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)] pub struct SessionMeta { - pub id: ConversationId, + pub id: ThreadId, pub timestamp: String, pub cwd: PathBuf, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub name: Option, pub originator: String, pub cli_version: String, pub instructions: Option, @@ -1266,9 +1309,10 @@ pub struct SessionMeta { impl Default for SessionMeta { fn default() -> Self { SessionMeta { - id: ConversationId::default(), + id: ThreadId::default(), timestamp: String::new(), cwd: PathBuf::new(), + name: None, originator: String::new(), cli_version: String::new(), instructions: None, @@ -1595,6 +1639,12 @@ pub struct UndoCompletedEvent { pub message: Option, } +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct ThreadRolledBackEvent { + /// Number of user turns that were removed from context. + pub num_turns: u32, +} + #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct StreamErrorEvent { pub message: String, @@ -1772,8 +1822,8 @@ pub struct SkillsListEntry { #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct SessionConfiguredEvent { - /// Name left as session_id instead of conversation_id for backwards compatibility. - pub session_id: ConversationId, + /// Name left as session_id instead of thread_id for backwards compatibility. + pub session_id: ThreadId, /// Tell the client what model is being queried. pub model: String, @@ -1901,7 +1951,7 @@ mod tests { #[test] fn item_started_event_from_web_search_emits_begin_event() { let event = ItemStartedEvent { - thread_id: ConversationId::new(), + thread_id: ThreadId::new(), turn_id: "turn-1".into(), item: TurnItem::WebSearch(WebSearchItem { id: "search-1".into(), @@ -1920,7 +1970,7 @@ mod tests { #[test] fn item_started_event_from_non_web_search_emits_no_legacy_events() { let event = ItemStartedEvent { - thread_id: ConversationId::new(), + thread_id: ThreadId::new(), turn_id: "turn-1".into(), item: TurnItem::UserMessage(UserMessageItem::new(&[])), }; @@ -1928,11 +1978,67 @@ mod tests { assert!(event.as_legacy_events(false).is_empty()); } + #[test] + fn user_input_serialization_omits_final_output_json_schema_when_none() -> Result<()> { + let op = Op::UserInput { + items: Vec::new(), + final_output_json_schema: None, + }; + + let json_op = serde_json::to_value(op)?; + assert_eq!(json_op, json!({ "type": "user_input", "items": [] })); + + Ok(()) + } + + #[test] + fn user_input_deserializes_without_final_output_json_schema_field() -> Result<()> { + let op: Op = serde_json::from_value(json!({ "type": "user_input", "items": [] }))?; + + assert_eq!( + op, + Op::UserInput { + items: Vec::new(), + final_output_json_schema: None, + } + ); + + Ok(()) + } + + #[test] + fn user_input_serialization_includes_final_output_json_schema_when_some() -> Result<()> { + let schema = json!({ + "type": "object", + "properties": { + "answer": { "type": "string" } + }, + "required": ["answer"], + "additionalProperties": false + }); + let op = Op::UserInput { + items: Vec::new(), + final_output_json_schema: Some(schema.clone()), + }; + + let json_op = serde_json::to_value(op)?; + assert_eq!( + json_op, + json!({ + "type": "user_input", + "items": [], + "final_output_json_schema": schema, + }) + ); + + Ok(()) + } + /// Serialize Event to verify that its JSON representation has the expected /// amount of nesting. #[test] fn serialize_event() -> Result<()> { - let conversation_id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?; + let conversation_id = ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?; let rollout_file = NamedTempFile::new()?; let event = Event { id: "1234".to_string(), diff --git a/codex-rs/protocol/src/conversation_id.rs b/codex-rs/protocol/src/thread_id.rs similarity index 75% rename from codex-rs/protocol/src/conversation_id.rs rename to codex-rs/protocol/src/thread_id.rs index db104d45312..8589566a257 100644 --- a/codex-rs/protocol/src/conversation_id.rs +++ b/codex-rs/protocol/src/thread_id.rs @@ -10,11 +10,11 @@ use uuid::Uuid; #[derive(Debug, Clone, Copy, PartialEq, Eq, TS, Hash)] #[ts(type = "string")] -pub struct ConversationId { +pub struct ThreadId { uuid: Uuid, } -impl ConversationId { +impl ThreadId { pub fn new() -> Self { Self { uuid: Uuid::now_v7(), @@ -28,19 +28,19 @@ impl ConversationId { } } -impl Default for ConversationId { +impl Default for ThreadId { fn default() -> Self { Self::new() } } -impl Display for ConversationId { +impl Display for ThreadId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.uuid) } } -impl Serialize for ConversationId { +impl Serialize for ThreadId { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -49,7 +49,7 @@ impl Serialize for ConversationId { } } -impl<'de> Deserialize<'de> for ConversationId { +impl<'de> Deserialize<'de> for ThreadId { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, @@ -60,9 +60,9 @@ impl<'de> Deserialize<'de> for ConversationId { } } -impl JsonSchema for ConversationId { +impl JsonSchema for ThreadId { fn schema_name() -> String { - "ConversationId".to_string() + "ThreadId".to_string() } fn json_schema(generator: &mut SchemaGenerator) -> Schema { @@ -70,12 +70,16 @@ impl JsonSchema for ConversationId { } } +/// Backward-compatible alias for the previous name. +#[deprecated(note = "use ThreadId instead")] +pub type ConversationId = ThreadId; + #[cfg(test)] mod tests { use super::*; #[test] - fn test_conversation_id_default_is_not_zeroes() { - let id = ConversationId::default(); + fn test_thread_id_default_is_not_zeroes() { + let id = ThreadId::default(); assert_ne!(id.uuid, Uuid::nil()); } } diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index e0af51e7ebb..32223f18ec3 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -22,7 +22,7 @@ use crate::tui::TuiEvent; use crate::update_action::UpdateAction; use codex_ansi_escape::ansi_escape_line; use codex_core::AuthManager; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::config::Config; use codex_core::config::edit::ConfigEdit; use codex_core::config::edit::ConfigEditsBuilder; @@ -38,7 +38,7 @@ use codex_core::protocol::Op; use codex_core::protocol::SessionSource; use codex_core::protocol::SkillErrorInfo; use codex_core::protocol::TokenUsage; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelUpgrade; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; @@ -70,21 +70,17 @@ const EXTERNAL_EDITOR_HINT: &str = "Save and close external editor to continue." #[derive(Debug, Clone)] pub struct AppExitInfo { pub token_usage: TokenUsage, - pub conversation_id: Option, + pub thread_id: Option, pub update_action: Option, } -fn session_summary( - token_usage: TokenUsage, - conversation_id: Option, -) -> Option { +fn session_summary(token_usage: TokenUsage, thread_id: Option) -> Option { if token_usage.is_zero() { return None; } let usage_line = FinalOutput::from(token_usage).to_string(); - let resume_command = - conversation_id.map(|conversation_id| format!("codex resume {conversation_id}")); + let resume_command = thread_id.map(|thread_id| format!("codex resume {thread_id}")); Some(SessionSummary { usage_line, resume_command, @@ -275,7 +271,7 @@ async fn handle_model_migration_prompt_if_needed( ModelMigrationOutcome::Exit => { return Some(AppExitInfo { token_usage: TokenUsage::default(), - conversation_id: None, + thread_id: None, update_action: None, }); } @@ -286,7 +282,7 @@ async fn handle_model_migration_prompt_if_needed( } pub(crate) struct App { - pub(crate) server: Arc, + pub(crate) server: Arc, pub(crate) app_event_tx: AppEventSender, pub(crate) chat_widget: ChatWidget, pub(crate) auth_manager: Arc, @@ -316,7 +312,7 @@ pub(crate) struct App { pub(crate) pending_update_action: Option, /// Ignore the next ShutdownComplete event when we're intentionally - /// stopping a conversation (e.g., before starting a new one). + /// stopping a thread (e.g., before starting a new one). suppress_shutdown_complete: bool, // One-shot suppression of the next world-writable scan after user confirmation. @@ -324,11 +320,11 @@ pub(crate) struct App { } impl App { - async fn shutdown_current_conversation(&mut self) { - if let Some(conversation_id) = self.chat_widget.conversation_id() { + async fn shutdown_current_thread(&mut self) { + if let Some(thread_id) = self.chat_widget.thread_id() { self.suppress_shutdown_complete = true; self.chat_widget.submit_op(Op::Shutdown); - self.server.remove_conversation(&conversation_id).await; + self.server.remove_thread(&thread_id).await; } } @@ -348,11 +344,12 @@ impl App { let (app_event_tx, mut app_event_rx) = unbounded_channel(); let app_event_tx = AppEventSender::new(app_event_tx); - let conversation_manager = Arc::new(ConversationManager::new( + let thread_manager = Arc::new(ThreadManager::new( + config.codex_home.clone(), auth_manager.clone(), SessionSource::Cli, )); - let mut model = conversation_manager + let mut model = thread_manager .get_models_manager() .get_model(&config.model, &config) .await; @@ -361,7 +358,7 @@ impl App { &mut config, model.as_str(), &app_event_tx, - conversation_manager.get_models_manager(), + thread_manager.get_models_manager(), ) .await; if let Some(exit_info) = exit_info { @@ -382,20 +379,16 @@ impl App { initial_images: initial_images.clone(), enhanced_keys_supported, auth_manager: auth_manager.clone(), - models_manager: conversation_manager.get_models_manager(), + models_manager: thread_manager.get_models_manager(), feedback: feedback.clone(), is_first_run, model: model.clone(), }; - ChatWidget::new(init, conversation_manager.clone()) + ChatWidget::new(init, thread_manager.clone()) } ResumeSelection::Resume(path) => { - let resumed = conversation_manager - .resume_conversation_from_rollout( - config.clone(), - path.clone(), - auth_manager.clone(), - ) + let resumed = thread_manager + .resume_thread_from_rollout(config.clone(), path.clone(), auth_manager.clone()) .await .wrap_err_with(|| { format!("Failed to resume session from {}", path.display()) @@ -408,16 +401,12 @@ impl App { initial_images: initial_images.clone(), enhanced_keys_supported, auth_manager: auth_manager.clone(), - models_manager: conversation_manager.get_models_manager(), + models_manager: thread_manager.get_models_manager(), feedback: feedback.clone(), is_first_run, model: model.clone(), }; - ChatWidget::new_from_existing( - init, - resumed.conversation, - resumed.session_configured, - ) + ChatWidget::new_from_existing(init, resumed.thread, resumed.session_configured) } }; @@ -428,7 +417,7 @@ impl App { let upgrade_version = crate::updates::get_upgrade_version(&config); let mut app = Self { - server: conversation_manager.clone(), + server: thread_manager.clone(), app_event_tx, chat_widget, auth_manager: auth_manager.clone(), @@ -501,7 +490,7 @@ impl App { tui.terminal.clear()?; Ok(AppExitInfo { token_usage: app.token_usage(), - conversation_id: app.chat_widget.conversation_id(), + thread_id: app.chat_widget.thread_id(), update_action: app.pending_update_action, }) } @@ -555,18 +544,16 @@ impl App { } async fn handle_event(&mut self, tui: &mut tui::Tui, event: AppEvent) -> Result { - let model_family = self + let model_info = self .server .get_models_manager() - .construct_model_family(self.current_model.as_str(), &self.config) + .construct_model_info(self.current_model.as_str(), &self.config) .await; match event { AppEvent::NewSession => { - let summary = session_summary( - self.chat_widget.token_usage(), - self.chat_widget.conversation_id(), - ); - self.shutdown_current_conversation().await; + let summary = + session_summary(self.chat_widget.token_usage(), self.chat_widget.thread_id()); + self.shutdown_current_thread().await; let init = crate::chatwidget::ChatWidgetInit { config: self.config.clone(), frame_requester: tui.frame_requester(), @@ -581,7 +568,7 @@ impl App { model: self.current_model.clone(), }; self.chat_widget = ChatWidget::new(init, self.server.clone()); - self.current_model = model_family.get_model_slug().to_string(); + self.current_model = model_info.slug.clone(); if let Some(summary) = summary { let mut lines: Vec> = vec![summary.usage_line.clone().into()]; if let Some(command) = summary.resume_command { @@ -604,11 +591,11 @@ impl App { ResumeSelection::Resume(path) => { let summary = session_summary( self.chat_widget.token_usage(), - self.chat_widget.conversation_id(), + self.chat_widget.thread_id(), ); match self .server - .resume_conversation_from_rollout( + .resume_thread_from_rollout( self.config.clone(), path.clone(), self.auth_manager.clone(), @@ -616,7 +603,7 @@ impl App { .await { Ok(resumed) => { - self.shutdown_current_conversation().await; + self.shutdown_current_thread().await; let init = crate::chatwidget::ChatWidgetInit { config: self.config.clone(), frame_requester: tui.frame_requester(), @@ -632,10 +619,10 @@ impl App { }; self.chat_widget = ChatWidget::new_from_existing( init, - resumed.conversation, + resumed.thread, resumed.session_configured, ); - self.current_model = model_family.get_model_slug().to_string(); + self.current_model = model_info.slug.clone(); if let Some(summary) = summary { let mut lines: Vec> = vec![summary.usage_line.clone().into()]; @@ -1333,23 +1320,26 @@ mod tests { use crate::history_cell::new_session_info; use codex_core::AuthManager; use codex_core::CodexAuth; - use codex_core::ConversationManager; + use codex_core::ThreadManager; + use codex_core::config::ConfigBuilder; use codex_core::protocol::AskForApproval; use codex_core::protocol::Event; use codex_core::protocol::EventMsg; use codex_core::protocol::SandboxPolicy; use codex_core::protocol::SessionConfiguredEvent; - use codex_protocol::ConversationId; + use codex_protocol::ThreadId; + use insta::assert_snapshot; use ratatui::prelude::Line; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; + use tempfile::tempdir; async fn make_test_app() -> App { let (chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender().await; let config = chat_widget.config_ref().clone(); let current_model = "gpt-5.2-codex".to_string(); - let server = Arc::new(ConversationManager::with_models_provider( + let server = Arc::new(ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), )); @@ -1388,7 +1378,7 @@ mod tests { let (chat_widget, app_event_tx, rx, op_rx) = make_chatwidget_manual_with_sender().await; let config = chat_widget.config_ref().clone(); let current_model = "gpt-5.2-codex".to_string(); - let server = Arc::new(ConversationManager::with_models_provider( + let server = Arc::new(ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), )); @@ -1427,6 +1417,24 @@ mod tests { codex_core::models_manager::model_presets::all_model_presets().clone() } + fn model_migration_copy_to_plain_text( + copy: &crate::model_migration::ModelMigrationCopy, + ) -> String { + let mut s = String::new(); + for span in ©.heading { + s.push_str(&span.content); + } + s.push('\n'); + s.push('\n'); + for line in ©.content { + for span in &line.spans { + s.push_str(&span.content); + } + s.push('\n'); + } + s + } + #[tokio::test] async fn model_migration_prompt_only_shows_for_deprecated_models() { let seen = BTreeMap::new(); @@ -1508,6 +1516,59 @@ mod tests { assert!(target_preset_for_upgrade(&available, "missing-target").is_none()); } + #[tokio::test] + async fn model_migration_prompt_shows_for_hidden_model() { + let codex_home = tempdir().expect("temp codex home"); + let config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .build() + .await + .expect("config"); + + let available_models = all_model_presets(); + let current = available_models + .iter() + .find(|preset| preset.model == "gpt-5.1-codex") + .cloned() + .expect("gpt-5.1-codex preset present"); + assert!( + !current.show_in_picker, + "expected gpt-5.1-codex to be hidden from picker for this test" + ); + + let upgrade = current.upgrade.as_ref().expect("upgrade configured"); + assert!( + should_show_model_migration_prompt( + ¤t.model, + &upgrade.id, + &config.notices.model_migrations, + &available_models, + ), + "expected migration prompt to be eligible for hidden model" + ); + + let target = target_preset_for_upgrade(&available_models, &upgrade.id) + .expect("upgrade target present"); + let target_description = + (!target.description.is_empty()).then(|| target.description.clone()); + let can_opt_out = true; + let copy = migration_copy_for_models( + ¤t.model, + &upgrade.id, + upgrade.model_link.clone(), + upgrade.upgrade_copy.clone(), + target.display_name.clone(), + target_description, + can_opt_out, + ); + + // Snapshot the copy we would show; rendering is covered by model_migration snapshots. + assert_snapshot!( + "model_migration_prompt_shows_for_hidden_model", + model_migration_copy_to_plain_text(©) + ); + } + #[tokio::test] async fn update_reasoning_effort_updates_config() { let mut app = make_test_app().await; @@ -1545,7 +1606,7 @@ mod tests { let make_header = |is_first| { let event = SessionConfiguredEvent { - session_id: ConversationId::new(), + session_id: ThreadId::new(), model: "gpt-test".to_string(), model_provider_id: "test-provider".to_string(), approval_policy: AskForApproval::Never, @@ -1567,7 +1628,7 @@ mod tests { // Simulate the transcript after trimming for a fork, replaying history, and // appending the edited turn. The session header separates the retained history - // from the forked conversation's replayed turns. + // from the forked thread's replayed turns. app.transcript_cells = vec![ make_header(true), user_cell("first question"), @@ -1583,7 +1644,7 @@ mod tests { assert_eq!(user_count(&app.transcript_cells), 2); - app.backtrack.base_id = Some(ConversationId::new()); + app.backtrack.base_id = Some(ThreadId::new()); app.backtrack.primed = true; app.backtrack.nth_user_message = user_count(&app.transcript_cells).saturating_sub(1); @@ -1598,9 +1659,9 @@ mod tests { async fn new_session_requests_shutdown_for_previous_conversation() { let (mut app, mut app_event_rx, mut op_rx) = make_test_app_with_channels().await; - let conversation_id = ConversationId::new(); + let thread_id = ThreadId::new(); let event = SessionConfiguredEvent { - session_id: conversation_id, + session_id: thread_id, model: "gpt-test".to_string(), model_provider_id: "test-provider".to_string(), approval_policy: AskForApproval::Never, @@ -1621,7 +1682,7 @@ mod tests { while app_event_rx.try_recv().is_ok() {} while op_rx.try_recv().is_ok() {} - app.shutdown_current_conversation().await; + app.shutdown_current_thread().await; match op_rx.try_recv() { Ok(Op::Shutdown) => {} @@ -1643,8 +1704,7 @@ mod tests { total_tokens: 12, ..Default::default() }; - let conversation = - ConversationId::from_string("123e4567-e89b-12d3-a456-426614174000").unwrap(); + let conversation = ThreadId::from_string("123e4567-e89b-12d3-a456-426614174000").unwrap(); let summary = session_summary(usage, Some(conversation)).expect("summary"); assert_eq!( diff --git a/codex-rs/tui/src/app_backtrack.rs b/codex-rs/tui/src/app_backtrack.rs index ce5dff2ed85..c28680dd930 100644 --- a/codex-rs/tui/src/app_backtrack.rs +++ b/codex-rs/tui/src/app_backtrack.rs @@ -9,7 +9,7 @@ use crate::pager_overlay::Overlay; use crate::tui; use crate::tui::TuiEvent; use codex_core::protocol::ConversationPathResponseEvent; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use color_eyre::eyre::Result; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; @@ -20,14 +20,14 @@ use crossterm::event::KeyEventKind; pub(crate) struct BacktrackState { /// True when Esc has primed backtrack mode in the main view. pub(crate) primed: bool, - /// Session id of the base conversation to fork from. - pub(crate) base_id: Option, + /// Session id of the base thread to fork from. + pub(crate) base_id: Option, /// Index in the transcript of the last user message. pub(crate) nth_user_message: usize, /// True when the transcript overlay is showing a backtrack preview. pub(crate) overlay_preview_active: bool, /// Pending fork request: (base_id, nth_user_message, prefill). - pub(crate) pending: Option<(ConversationId, usize, String)>, + pub(crate) pending: Option<(ThreadId, usize, String)>, } impl App { @@ -95,11 +95,11 @@ impl App { } } - /// Stage a backtrack and request conversation history from the agent. + /// Stage a backtrack and request thread history from the agent. pub(crate) fn request_backtrack( &mut self, prefill: String, - base_id: ConversationId, + base_id: ThreadId, nth_user_message: usize, ) { self.backtrack.pending = Some((base_id, nth_user_message, prefill)); @@ -153,7 +153,7 @@ impl App { fn prime_backtrack(&mut self) { self.backtrack.primed = true; self.backtrack.nth_user_message = usize::MAX; - self.backtrack.base_id = self.chat_widget.conversation_id(); + self.backtrack.base_id = self.chat_widget.thread_id(); self.chat_widget.show_esc_backtrack_hint(); } @@ -169,7 +169,7 @@ impl App { /// When overlay is already open, begin preview mode and select latest user message. fn begin_overlay_backtrack_preview(&mut self, tui: &mut tui::Tui) { self.backtrack.primed = true; - self.backtrack.base_id = self.chat_widget.conversation_id(); + self.backtrack.base_id = self.chat_widget.thread_id(); self.backtrack.overlay_preview_active = true; let count = user_count(&self.transcript_cells); if let Some(last) = count.checked_sub(1) { @@ -315,28 +315,26 @@ impl App { } } - /// Thin wrapper around ConversationManager::fork_conversation. + /// Thin wrapper around ThreadManager::fork_thread. async fn perform_fork( &self, path: PathBuf, nth_user_message: usize, cfg: codex_core::config::Config, - ) -> codex_core::error::Result { - self.server - .fork_conversation(nth_user_message, cfg, path) - .await + ) -> codex_core::error::Result { + self.server.fork_thread(nth_user_message, cfg, path).await } - /// Install a forked conversation into the ChatWidget and update UI to reflect selection. + /// Install a forked thread into the ChatWidget and update UI to reflect selection. fn install_forked_conversation( &mut self, tui: &mut tui::Tui, cfg: codex_core::config::Config, - new_conv: codex_core::NewConversation, + new_conv: codex_core::NewThread, nth_user_message: usize, prefill: &str, ) { - let conv = new_conv.conversation; + let thread = new_conv.thread; let session_configured = new_conv.session_configured; let init = crate::chatwidget::ChatWidgetInit { config: cfg, @@ -352,7 +350,7 @@ impl App { is_first_run: false, }; self.chat_widget = - crate::chatwidget::ChatWidget::new_from_existing(init, conv, session_configured); + crate::chatwidget::ChatWidget::new_from_existing(init, thread, session_configured); // Trim transcript up to the selected user message and re-render it. self.trim_transcript_for_backtrack(nth_user_message); self.render_transcript_once(tui); diff --git a/codex-rs/tui/src/bottom_pane/approval_overlay.rs b/codex-rs/tui/src/bottom_pane/approval_overlay.rs index d42861eb1d5..0f0445fee83 100644 --- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs @@ -461,9 +461,13 @@ fn exec_options( .chain( proposed_execpolicy_amendment .filter(|_| features.enabled(Feature::ExecPolicy)) - .map(|prefix| { + .and_then(|prefix| { let rendered_prefix = strip_bash_lc_and_escape(prefix.command()); - ApprovalOption { + if rendered_prefix.contains('\n') || rendered_prefix.contains('\r') { + return None; + } + + Some(ApprovalOption { label: format!( "Yes, and don't ask again for commands that start with `{rendered_prefix}`" ), @@ -474,7 +478,7 @@ fn exec_options( ), display_shortcut: None, additional_shortcuts: vec![key_hint::plain(KeyCode::Char('p'))], - } + }) }), ) .chain([ApprovalOption { @@ -494,6 +498,12 @@ fn patch_options() -> Vec { display_shortcut: None, additional_shortcuts: vec![key_hint::plain(KeyCode::Char('y'))], }, + ApprovalOption { + label: "Yes, and don't ask again for these files".to_string(), + decision: ApprovalDecision::Review(ReviewDecision::ApprovedForSession), + display_shortcut: None, + additional_shortcuts: vec![key_hint::plain(KeyCode::Char('a'))], + }, ApprovalOption { label: "No, and tell Codex what to do differently".to_string(), decision: ApprovalDecision::Review(ReviewDecision::Abort), diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index d6d01045b4b..b7edcbaf309 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -73,6 +73,7 @@ const LARGE_PASTE_CHAR_THRESHOLD: usize = 1000; pub enum InputResult { Submitted(String), Command(SlashCommand), + CommandWithArgs(SlashCommand, String), None, } @@ -109,6 +110,9 @@ pub(crate) struct ChatComposer { attached_images: Vec, placeholder_text: String, is_task_running: bool, + /// When false, the composer is temporarily read-only (e.g. during sandbox setup). + input_enabled: bool, + input_disabled_placeholder: Option, // Non-bracketed paste burst tracker. paste_burst: PasteBurst, // When true, disables paste-burst logic and inserts characters immediately. @@ -159,6 +163,8 @@ impl ChatComposer { attached_images: Vec::new(), placeholder_text, is_task_running: false, + input_enabled: true, + input_disabled_placeholder: None, paste_burst: PasteBurst::default(), disable_paste_burst: false, custom_prompts: Vec::new(), @@ -487,6 +493,10 @@ impl ChatComposer { /// Handle a key event coming from the main UI. pub fn handle_key_event(&mut self, key_event: KeyEvent) -> (InputResult, bool) { + if !self.input_enabled { + return (InputResult::None, false); + } + let result = match &mut self.active_popup { ActivePopup::Command(_) => self.handle_key_event_with_slash_popup(key_event), ActivePopup::File(_) => self.handle_key_event_with_file_popup(key_event), @@ -683,6 +693,42 @@ impl ChatComposer { if self.paste_burst.try_append_char_if_active(ch, now) { return (InputResult::None, true); } + // Non-ASCII input often comes from IMEs and can arrive in quick bursts. + // We do not want to hold the first char (flicker suppression) on this path, but we + // still want to detect paste-like bursts. Before applying any non-ASCII input, flush + // any existing burst buffer (including a pending first char from the ASCII path) so + // we don't carry that transient state forward. + if let Some(pasted) = self.paste_burst.flush_before_modified_input() { + self.handle_paste(pasted); + } + if let Some(decision) = self.paste_burst.on_plain_char_no_hold(now) { + match decision { + CharDecision::BufferAppend => { + self.paste_burst.append_char_to_buffer(ch, now); + return (InputResult::None, true); + } + CharDecision::BeginBuffer { retro_chars } => { + let cur = self.textarea.cursor(); + let txt = self.textarea.text(); + let safe_cur = Self::clamp_to_char_boundary(txt, cur); + let before = &txt[..safe_cur]; + // If decision is to buffer, seed the paste burst buffer with the grabbed chars + new. + // Otherwise, fall through to normal insertion below. + if let Some(grab) = + self.paste_burst + .decide_begin_buffer(now, before, retro_chars as usize) + { + if !grab.grabbed.is_empty() { + self.textarea.replace_range(grab.start_byte..safe_cur, ""); + } + // seed the paste burst buffer with everything (grabbed + new) + self.paste_burst.append_char_to_buffer(ch, now); + return (InputResult::None, true); + } + } + _ => unreachable!("on_plain_char_no_hold returned unexpected variant"), + } + } } if let Some(pasted) = self.paste_burst.flush_before_modified_input() { self.handle_paste(pasted); @@ -1274,6 +1320,18 @@ impl ChatComposer { } } + if !input_starts_with_space + && let Some((name, rest)) = parse_slash_name(&text) + && !rest.is_empty() + && !name.contains('/') + && let Some((_n, cmd)) = built_in_slash_commands() + .into_iter() + .find(|(command_name, _)| *command_name == name) + && matches!(cmd, SlashCommand::Review | SlashCommand::Rename) + { + return (InputResult::CommandWithArgs(cmd, rest.to_string()), true); + } + let expanded_prompt = match expand_custom_prompt(&text, &self.custom_prompts) { Ok(expanded) => expanded, Err(err) => { @@ -1346,9 +1404,8 @@ impl ChatComposer { { let has_ctrl_or_alt = has_ctrl_or_alt(modifiers); if !has_ctrl_or_alt { - // Non-ASCII characters (e.g., from IMEs) can arrive in quick bursts and be - // misclassified by paste heuristics. Flush any active burst buffer and insert - // non-ASCII characters directly. + // Non-ASCII characters (e.g., from IMEs) can arrive in quick bursts, so avoid + // holding the first char while still allowing burst detection for paste input. if !ch.is_ascii() { return self.handle_non_ascii_char(input); } @@ -1370,7 +1427,6 @@ impl ChatComposer { if !grab.grabbed.is_empty() { self.textarea.replace_range(grab.start_byte..safe_cur, ""); } - self.paste_burst.begin_with_retro_grabbed(grab.grabbed, now); self.paste_burst.append_char_to_buffer(ch, now); return (InputResult::None, true); } @@ -1650,6 +1706,16 @@ impl ChatComposer { fn sync_popups(&mut self) { let file_token = Self::current_at_token(&self.textarea); + let browsing_history = self + .history + .should_handle_navigation(self.textarea.text(), self.textarea.cursor()); + // When browsing input history (shell-style Up/Down recall), skip all popup + // synchronization so nothing steals focus from continued history navigation. + if browsing_history { + self.active_popup = ActivePopup::None; + return; + } + let skill_token = self.current_skill_token(); let allow_command_popup = file_token.is_none() && skill_token.is_none(); @@ -1854,6 +1920,17 @@ impl ChatComposer { self.has_focus = has_focus; } + #[allow(dead_code)] + pub(crate) fn set_input_enabled(&mut self, enabled: bool, placeholder: Option) { + self.input_enabled = enabled; + self.input_disabled_placeholder = if enabled { None } else { placeholder }; + + // Avoid leaving interactive popups open while input is blocked. + if !enabled && !matches!(self.active_popup, ActivePopup::None) { + self.active_popup = ActivePopup::None; + } + } + pub fn set_task_running(&mut self, running: bool) { self.is_task_running = running; } @@ -1879,6 +1956,10 @@ impl ChatComposer { impl Renderable for ChatComposer { fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> { + if !self.input_enabled { + return None; + } + let [_, textarea_rect, _] = self.layout_areas(area); let state = *self.textarea_state.borrow(); self.textarea.cursor_pos_with_state(textarea_rect, state) @@ -1957,10 +2038,15 @@ impl Renderable for ChatComposer { let style = user_message_style(); Block::default().style(style).render_ref(composer_rect, buf); if !textarea_rect.is_empty() { + let prompt = if self.input_enabled { + "›".bold() + } else { + "›".dim() + }; buf.set_span( textarea_rect.x - LIVE_PREFIX_COLS, textarea_rect.y, - &"›".bold(), + &prompt, textarea_rect.width, ); } @@ -1968,7 +2054,15 @@ impl Renderable for ChatComposer { let mut state = self.textarea_state.borrow_mut(); StatefulWidgetRef::render_ref(&(&self.textarea), textarea_rect, buf, &mut state); if self.textarea.text().is_empty() { - let placeholder = Span::from(self.placeholder_text.as_str()).dim(); + let text = if self.input_enabled { + self.placeholder_text.as_str().to_string() + } else { + self.input_disabled_placeholder + .as_deref() + .unwrap_or("Input disabled.") + .to_string() + }; + let placeholder = Span::from(text).dim().italic(); Line::from(vec![placeholder]).render_ref(textarea_rect.inner(Margin::new(0, 0)), buf); } } @@ -2261,8 +2355,7 @@ mod tests { composer.handle_key_event(KeyEvent::new(KeyCode::Char('?'), KeyModifiers::NONE)); assert_eq!(result, InputResult::None); assert!(needs_redraw, "typing should still mark the view dirty"); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let _ = composer.flush_paste_burst_if_due(); + let _ = flush_after_paste_burst(&mut composer); assert_eq!(composer.textarea.text(), "h?"); assert_eq!(composer.footer_mode, FooterMode::ShortcutSummary); assert_eq!(composer.footer_mode(), FooterMode::ContextOnly); @@ -2284,14 +2377,18 @@ mod tests { false, ); + // Force an active paste burst so this test doesn't depend on tight timing. + composer + .paste_burst + .begin_with_retro_grabbed(String::new(), Instant::now()); + for ch in ['h', 'i', '?', 't', 'h', 'e', 'r', 'e'] { let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(ch), KeyModifiers::NONE)); } assert!(composer.is_in_paste_burst()); assert_eq!(composer.textarea.text(), ""); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let _ = composer.flush_paste_burst_if_due(); + let _ = flush_after_paste_burst(&mut composer); assert_eq!(composer.textarea.text(), "hi?there"); assert_ne!(composer.footer_mode, FooterMode::ShortcutOverlay); @@ -2500,6 +2597,116 @@ mod tests { } } + #[test] + fn non_ascii_char_inserts_immediately_without_burst_state() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('あ'), KeyModifiers::NONE)); + + assert_eq!(composer.textarea.text(), "あ"); + assert!(!composer.is_in_paste_burst()); + } + + // test a variety of non-ascii char sequences to ensure we are handling them correctly + #[test] + fn non_ascii_burst_handles_newline() { + let test_cases = [ + // triggers on windows + "天地玄黄 宇宙洪荒 +日月盈昃 辰宿列张 +寒来暑往 秋收冬藏 + +你好世界 编码测试 +汉字处理 UTF-8 +终端显示 正确无误 + +风吹竹林 月照大江 +白云千载 青山依旧 +程序员 与 Unicode 同行", + // Simulate pasting "你 好\nhi" with an ideographic space to trigger pastey heuristics. + "你 好\nhi", + ]; + + for test_case in test_cases { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + for c in test_case.chars() { + let _ = + composer.handle_key_event(KeyEvent::new(KeyCode::Char(c), KeyModifiers::NONE)); + } + + assert!( + composer.textarea.text().is_empty(), + "non-empty textarea before flush: {test_case}", + ); + let _ = flush_after_paste_burst(&mut composer); + assert_eq!(composer.textarea.text(), test_case); + } + } + + #[test] + fn ascii_burst_treats_enter_as_newline() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + // Force an active burst so this test doesn't depend on tight timing. + composer + .paste_burst + .begin_with_retro_grabbed(String::new(), Instant::now()); + + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('h'), KeyModifiers::NONE)); + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('i'), KeyModifiers::NONE)); + + let (result, _) = + composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); + assert!( + matches!(result, InputResult::None), + "Enter during a burst should insert newline, not submit" + ); + + for ch in ['t', 'h', 'e', 'r', 'e'] { + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(ch), KeyModifiers::NONE)); + } + + let _ = flush_after_paste_burst(&mut composer); + assert_eq!(composer.textarea.text(), "hi\nthere"); + } + #[test] fn handle_paste_small_inserts_text() { use crossterm::event::KeyCode; @@ -2790,6 +2997,11 @@ mod tests { } } + fn flush_after_paste_burst(composer: &mut ChatComposer) -> bool { + std::thread::sleep(PasteBurst::recommended_active_flush_delay()); + composer.flush_paste_burst_if_due() + } + // Test helper: simulate human typing with a brief delay and flush the paste-burst buffer fn type_chars_humanlike(composer: &mut ChatComposer, chars: &[char]) { use crossterm::event::KeyCode; @@ -2831,6 +3043,9 @@ mod tests { InputResult::Command(cmd) => { assert_eq!(cmd.command(), "init"); } + InputResult::CommandWithArgs(_, _) => { + panic!("expected command dispatch without args for '/init'") + } InputResult::Submitted(text) => { panic!("expected command dispatch, but composer submitted literal text: {text}") } @@ -2839,6 +3054,44 @@ mod tests { assert!(composer.textarea.is_empty(), "composer should be cleared"); } + #[test] + fn slash_review_with_args_dispatches_command_with_args() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + type_chars_humanlike(&mut composer, &['/', 'r', 'e', 'v', 'i', 'e', 'w', ' ']); + type_chars_humanlike(&mut composer, &['f', 'i', 'x', ' ', 't', 'h', 'i', 's']); + + let (result, _needs_redraw) = + composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); + + match result { + InputResult::CommandWithArgs(cmd, args) => { + assert_eq!(cmd, SlashCommand::Review); + assert_eq!(args, "fix this"); + } + InputResult::Command(cmd) => { + panic!("expected args for '/review', got bare command: {cmd:?}") + } + InputResult::Submitted(text) => { + panic!("expected command dispatch, got literal submit: {text}") + } + InputResult::None => panic!("expected CommandWithArgs result for '/review'"), + } + assert!(composer.textarea.is_empty(), "composer should be cleared"); + } + #[test] fn extract_args_supports_quoted_paths_single_arg() { let args = extract_positional_args_for_prompt_line( @@ -2904,6 +3157,9 @@ mod tests { composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); match result { InputResult::Command(cmd) => assert_eq!(cmd.command(), "diff"), + InputResult::CommandWithArgs(_, _) => { + panic!("expected command dispatch without args for '/diff'") + } InputResult::Submitted(text) => { panic!("expected command dispatch after Tab completion, got literal submit: {text}") } @@ -2937,6 +3193,9 @@ mod tests { InputResult::Command(cmd) => { assert_eq!(cmd.command(), "mention"); } + InputResult::CommandWithArgs(_, _) => { + panic!("expected command dispatch without args for '/mention'") + } InputResult::Submitted(text) => { panic!("expected command dispatch, but composer submitted literal text: {text}") } @@ -3977,6 +4236,33 @@ mod tests { assert_eq!(InputResult::Submitted(expected), result); } + #[test] + fn pending_first_ascii_char_flushes_as_typed() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('h'), KeyModifiers::NONE)); + assert!(composer.is_in_paste_burst()); + assert!(composer.textarea.text().is_empty()); + + std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); + let flushed = composer.flush_paste_burst_if_due(); + assert!(flushed, "expected pending first char to flush"); + assert_eq!(composer.textarea.text(), "h"); + assert!(!composer.is_in_paste_burst()); + } + #[test] fn burst_paste_fast_small_buffers_and_flushes_on_stop() { use crossterm::event::KeyCode; @@ -4011,8 +4297,7 @@ mod tests { composer.textarea.text().is_empty(), "text should remain empty until flush" ); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let flushed = composer.flush_paste_burst_if_due(); + let flushed = flush_after_paste_burst(&mut composer); assert!(flushed, "expected buffered text to flush after stop"); assert_eq!(composer.textarea.text(), "a".repeat(count)); assert!( @@ -4045,8 +4330,7 @@ mod tests { // Nothing should appear until we stop and flush assert!(composer.textarea.text().is_empty()); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let flushed = composer.flush_paste_burst_if_due(); + let flushed = flush_after_paste_burst(&mut composer); assert!(flushed, "expected flush after stopping fast input"); let expected_placeholder = format!("[Pasted Content {count} chars]"); @@ -4109,6 +4393,59 @@ mod tests { assert_eq!(result, InputResult::None); } + #[test] + fn history_navigation_takes_priority_over_popups() { + use codex_protocol::protocol::SkillScope; + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + use tokio::sync::mpsc::unbounded_channel; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + composer.set_skill_mentions(Some(vec![SkillMetadata { + name: "codex-cli-release-notes".to_string(), + description: "example".to_string(), + short_description: None, + path: PathBuf::from("skills/codex-cli-release-notes/SKILL.md"), + scope: SkillScope::Repo, + }])); + + // Seed local history; the newest entry triggers the skills popup. + composer.history.record_local_submission("older"); + composer + .history + .record_local_submission("$codex-cli-release-notes"); + + // First Up recalls "$...", but we should not open the skills popup while browsing history. + let (result, _redraw) = + composer.handle_key_event(KeyEvent::new(KeyCode::Up, KeyModifiers::NONE)); + assert_eq!(result, InputResult::None); + assert_eq!(composer.textarea.text(), "$codex-cli-release-notes"); + assert!( + matches!(composer.active_popup, ActivePopup::None), + "expected no skills popup while browsing history" + ); + + // Second Up should navigate history again (no popup should interfere). + let (result, _redraw) = + composer.handle_key_event(KeyEvent::new(KeyCode::Up, KeyModifiers::NONE)); + assert_eq!(result, InputResult::None); + assert_eq!(composer.textarea.text(), "older"); + assert!( + matches!(composer.active_popup, ActivePopup::None), + "expected popup to be dismissed after history navigation" + ); + } + #[test] fn slash_popup_activated_for_bare_slash_and_valid_prefixes() { // use crossterm::event::{KeyCode, KeyEvent, KeyModifiers}; @@ -4266,4 +4603,38 @@ mod tests { ); assert_eq!(composer.attached_images.len(), 1); } + #[test] + fn input_disabled_ignores_keypresses_and_hides_cursor() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + composer.set_text_content("hello".to_string()); + composer.set_input_enabled(false, Some("Input disabled for test.".to_string())); + + let (result, needs_redraw) = + composer.handle_key_event(KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE)); + + assert_eq!(result, InputResult::None); + assert!(!needs_redraw); + assert_eq!(composer.current_text(), "hello"); + + let area = Rect { + x: 0, + y: 0, + width: 40, + height: 5, + }; + assert_eq!(composer.cursor_pos(area), None); + } } diff --git a/codex-rs/tui/src/bottom_pane/list_selection_view.rs b/codex-rs/tui/src/bottom_pane/list_selection_view.rs index e387b668613..40787a9c259 100644 --- a/codex-rs/tui/src/bottom_pane/list_selection_view.rs +++ b/codex-rs/tui/src/bottom_pane/list_selection_view.rs @@ -13,6 +13,7 @@ use ratatui::widgets::Block; use ratatui::widgets::Paragraph; use ratatui::widgets::Widget; +use super::selection_popup_common::wrap_styled_line; use crate::app_event_sender::AppEventSender; use crate::key_hint::KeyBinding; use crate::render::Insets; @@ -50,6 +51,7 @@ pub(crate) struct SelectionItem { pub(crate) struct SelectionViewParams { pub title: Option, pub subtitle: Option, + pub footer_note: Option>, pub footer_hint: Option>, pub items: Vec, pub is_searchable: bool, @@ -63,6 +65,7 @@ impl Default for SelectionViewParams { Self { title: None, subtitle: None, + footer_note: None, footer_hint: None, items: Vec::new(), is_searchable: false, @@ -74,6 +77,7 @@ impl Default for SelectionViewParams { } pub(crate) struct ListSelectionView { + footer_note: Option>, footer_hint: Option>, items: Vec, state: ScrollState, @@ -101,6 +105,7 @@ impl ListSelectionView { ])); } let mut s = Self { + footer_note: params.footer_note, footer_hint: params.footer_hint, items: params.items, state: ScrollState::new(), @@ -434,6 +439,11 @@ impl Renderable for ListSelectionView { if self.is_searchable { height = height.saturating_add(1); } + if let Some(note) = &self.footer_note { + let note_width = width.saturating_sub(2); + let note_lines = wrap_styled_line(note, note_width); + height = height.saturating_add(note_lines.len() as u16); + } if self.footer_hint.is_some() { height = height.saturating_add(1); } @@ -445,11 +455,15 @@ impl Renderable for ListSelectionView { return; } - let [content_area, footer_area] = Layout::vertical([ - Constraint::Fill(1), - Constraint::Length(if self.footer_hint.is_some() { 1 } else { 0 }), - ]) - .areas(area); + let note_width = area.width.saturating_sub(2); + let note_lines = self + .footer_note + .as_ref() + .map(|note| wrap_styled_line(note, note_width)); + let note_height = note_lines.as_ref().map_or(0, |lines| lines.len() as u16); + let footer_rows = note_height + u16::from(self.footer_hint.is_some()); + let [content_area, footer_area] = + Layout::vertical([Constraint::Fill(1), Constraint::Length(footer_rows)]).areas(area); Block::default() .style(user_message_style()) @@ -517,14 +531,43 @@ impl Renderable for ListSelectionView { ); } - if let Some(hint) = &self.footer_hint { - let hint_area = Rect { - x: footer_area.x + 2, - y: footer_area.y, - width: footer_area.width.saturating_sub(2), - height: footer_area.height, - }; - hint.clone().dim().render(hint_area, buf); + if footer_area.height > 0 { + let [note_area, hint_area] = Layout::vertical([ + Constraint::Length(note_height), + Constraint::Length(if self.footer_hint.is_some() { 1 } else { 0 }), + ]) + .areas(footer_area); + + if let Some(lines) = note_lines { + let note_area = Rect { + x: note_area.x + 2, + y: note_area.y, + width: note_area.width.saturating_sub(2), + height: note_area.height, + }; + for (idx, line) in lines.iter().enumerate() { + if idx as u16 >= note_area.height { + break; + } + let line_area = Rect { + x: note_area.x, + y: note_area.y + idx as u16, + width: note_area.width, + height: 1, + }; + line.clone().render(line_area, buf); + } + } + + if let Some(hint) = &self.footer_hint { + let hint_area = Rect { + x: hint_area.x + 2, + y: hint_area.y, + width: hint_area.width.saturating_sub(2), + height: hint_area.height, + }; + hint.clone().dim().render(hint_area, buf); + } } } } @@ -611,6 +654,38 @@ mod tests { assert_snapshot!("list_selection_spacing_with_subtitle", render_lines(&view)); } + #[test] + fn snapshot_footer_note_wraps() { + let (tx_raw, _rx) = unbounded_channel::(); + let tx = AppEventSender::new(tx_raw); + let items = vec![SelectionItem { + name: "Read Only".to_string(), + description: Some("Codex can read files".to_string()), + is_current: true, + dismiss_on_select: true, + ..Default::default() + }]; + let footer_note = Line::from(vec![ + "Note: ".dim(), + "Use /setup-elevated-sandbox".cyan(), + " to allow network access.".dim(), + ]); + let view = ListSelectionView::new( + SelectionViewParams { + title: Some("Select Approval Mode".to_string()), + footer_note: Some(footer_note), + footer_hint: Some(standard_popup_hint_line()), + items, + ..Default::default() + }, + tx, + ); + assert_snapshot!( + "list_selection_footer_note_wraps", + render_lines_with_width(&view, 40) + ); + } + #[test] fn renders_search_query_line_when_enabled() { let (tx_raw, _rx) = unbounded_channel::(); diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 7634f699aa0..fe626537ac4 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -264,6 +264,16 @@ impl BottomPane { self.request_redraw(); } + #[allow(dead_code)] + pub(crate) fn set_composer_input_enabled( + &mut self, + enabled: bool, + placeholder: Option, + ) { + self.composer.set_input_enabled(enabled, placeholder); + self.request_redraw(); + } + pub(crate) fn clear_composer_for_ctrl_c(&mut self) { self.composer.clear_for_ctrl_c(); self.request_redraw(); @@ -416,8 +426,8 @@ impl BottomPane { self.request_redraw(); } - pub(crate) fn set_unified_exec_sessions(&mut self, sessions: Vec) { - if self.unified_exec_footer.set_sessions(sessions) { + pub(crate) fn set_unified_exec_processes(&mut self, processes: Vec) { + if self.unified_exec_footer.set_processes(processes) { self.request_redraw(); } } diff --git a/codex-rs/tui/src/bottom_pane/paste_burst.rs b/codex-rs/tui/src/bottom_pane/paste_burst.rs index 49377cb21c5..96ed095b8f3 100644 --- a/codex-rs/tui/src/bottom_pane/paste_burst.rs +++ b/codex-rs/tui/src/bottom_pane/paste_burst.rs @@ -6,6 +6,12 @@ use std::time::Instant; const PASTE_BURST_MIN_CHARS: u16 = 3; const PASTE_BURST_CHAR_INTERVAL: Duration = Duration::from_millis(8); const PASTE_ENTER_SUPPRESS_WINDOW: Duration = Duration::from_millis(120); +// Slower paste burts have been observed in windows environments, but ideally +// we want to keep this low +#[cfg(not(windows))] +const PASTE_BURST_ACTIVE_IDLE_TIMEOUT: Duration = Duration::from_millis(8); +#[cfg(windows)] +const PASTE_BURST_ACTIVE_IDLE_TIMEOUT: Duration = Duration::from_millis(60); #[derive(Default)] pub(crate) struct PasteBurst { @@ -52,16 +58,14 @@ impl PasteBurst { PASTE_BURST_CHAR_INTERVAL + Duration::from_millis(1) } + #[cfg(test)] + pub(crate) fn recommended_active_flush_delay() -> Duration { + PASTE_BURST_ACTIVE_IDLE_TIMEOUT + Duration::from_millis(1) + } + /// Entry point: decide how to treat a plain char with current timing. pub fn on_plain_char(&mut self, ch: char, now: Instant) -> CharDecision { - match self.last_plain_char_time { - Some(prev) if now.duration_since(prev) <= PASTE_BURST_CHAR_INTERVAL => { - self.consecutive_plain_char_burst = - self.consecutive_plain_char_burst.saturating_add(1) - } - _ => self.consecutive_plain_char_burst = 1, - } - self.last_plain_char_time = Some(now); + self.note_plain_char(now); if self.active { self.burst_window_until = Some(now + PASTE_ENTER_SUPPRESS_WINDOW); @@ -92,6 +96,40 @@ impl PasteBurst { CharDecision::RetainFirstChar } + /// Like on_plain_char(), but never holds the first char. + /// + /// Used for non-ASCII input paths (e.g., IMEs) where holding a character can + /// feel like dropped input, while still allowing burst-based paste detection. + /// + /// Note: This method will only ever return BufferAppend or BeginBuffer. + pub fn on_plain_char_no_hold(&mut self, now: Instant) -> Option { + self.note_plain_char(now); + + if self.active { + self.burst_window_until = Some(now + PASTE_ENTER_SUPPRESS_WINDOW); + return Some(CharDecision::BufferAppend); + } + + if self.consecutive_plain_char_burst >= PASTE_BURST_MIN_CHARS { + return Some(CharDecision::BeginBuffer { + retro_chars: self.consecutive_plain_char_burst.saturating_sub(1), + }); + } + + None + } + + fn note_plain_char(&mut self, now: Instant) { + match self.last_plain_char_time { + Some(prev) if now.duration_since(prev) <= PASTE_BURST_CHAR_INTERVAL => { + self.consecutive_plain_char_burst = + self.consecutive_plain_char_burst.saturating_add(1) + } + _ => self.consecutive_plain_char_burst = 1, + } + self.last_plain_char_time = Some(now); + } + /// Flush the buffered burst if the inter-key timeout has elapsed. /// /// Returns Some(String) when either: @@ -102,9 +140,14 @@ impl PasteBurst { /// /// Returns None if the timeout has not elapsed or there is nothing to flush. pub fn flush_if_due(&mut self, now: Instant) -> FlushResult { + let timeout = if self.is_active_internal() { + PASTE_BURST_ACTIVE_IDLE_TIMEOUT + } else { + PASTE_BURST_CHAR_INTERVAL + }; let timed_out = self .last_plain_char_time - .is_some_and(|t| now.duration_since(t) > PASTE_BURST_CHAR_INTERVAL); + .is_some_and(|t| now.duration_since(t) > timeout); if timed_out && self.is_active_internal() { self.active = false; let out = std::mem::take(&mut self.buffer); diff --git a/codex-rs/tui/src/bottom_pane/selection_popup_common.rs b/codex-rs/tui/src/bottom_pane/selection_popup_common.rs index 48adef9b2c9..c1675b01aa3 100644 --- a/codex-rs/tui/src/bottom_pane/selection_popup_common.rs +++ b/codex-rs/tui/src/bottom_pane/selection_popup_common.rs @@ -26,6 +26,17 @@ pub(crate) struct GenericDisplayRow { pub wrap_indent: Option, // optional indent for wrapped lines } +pub(crate) fn wrap_styled_line<'a>(line: &'a Line<'a>, width: u16) -> Vec> { + use crate::wrapping::RtOptions; + use crate::wrapping::word_wrap_line; + + let width = width.max(1) as usize; + let opts = RtOptions::new(width) + .initial_indent(Line::from("")) + .subsequent_indent(Line::from("")); + word_wrap_line(line, opts) +} + fn line_width(line: &Line<'_>) -> usize { line.iter() .map(|span| UnicodeWidthStr::width(span.content.as_ref())) diff --git a/codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__list_selection_view__tests__list_selection_footer_note_wraps.snap b/codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__list_selection_view__tests__list_selection_footer_note_wraps.snap new file mode 100644 index 00000000000..ca571ae0ea4 --- /dev/null +++ b/codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__list_selection_view__tests__list_selection_footer_note_wraps.snap @@ -0,0 +1,14 @@ +--- +source: tui/src/bottom_pane/list_selection_view.rs +assertion_line: 683 +expression: "render_lines_with_width(&view, 40)" +--- + + Select Approval Mode + +› 1. Read Only (current) Codex can + read files + + Note: Use /setup-elevated-sandbox to + allow network access. + Press enter to confirm or esc to go ba diff --git a/codex-rs/tui/src/bottom_pane/textarea.rs b/codex-rs/tui/src/bottom_pane/textarea.rs index 2fd415c7f65..4fc673a11de 100644 --- a/codex-rs/tui/src/bottom_pane/textarea.rs +++ b/codex-rs/tui/src/bottom_pane/textarea.rs @@ -63,9 +63,10 @@ impl TextArea { pub fn set_text(&mut self, text: &str) { self.text = text.to_string(); self.cursor_pos = self.cursor_pos.clamp(0, self.text.len()); + self.elements.clear(); + self.cursor_pos = self.clamp_pos_to_nearest_boundary(self.cursor_pos); self.wrap_cache.replace(None); self.preferred_col = None; - self.elements.clear(); self.kill_buffer.clear(); } @@ -735,18 +736,36 @@ impl TextArea { .position(|e| pos > e.range.start && pos < e.range.end) } - fn clamp_pos_to_nearest_boundary(&self, mut pos: usize) -> usize { - if pos > self.text.len() { - pos = self.text.len(); + fn clamp_pos_to_char_boundary(&self, pos: usize) -> usize { + let pos = pos.min(self.text.len()); + if self.text.is_char_boundary(pos) { + return pos; + } + let mut prev = pos; + while prev > 0 && !self.text.is_char_boundary(prev) { + prev -= 1; + } + let mut next = pos; + while next < self.text.len() && !self.text.is_char_boundary(next) { + next += 1; + } + if pos.saturating_sub(prev) <= next.saturating_sub(pos) { + prev + } else { + next } + } + + fn clamp_pos_to_nearest_boundary(&self, pos: usize) -> usize { + let pos = self.clamp_pos_to_char_boundary(pos); if let Some(idx) = self.find_element_containing(pos) { let e = &self.elements[idx]; let dist_start = pos.saturating_sub(e.range.start); let dist_end = e.range.end.saturating_sub(pos); if dist_start <= dist_end { - e.range.start + self.clamp_pos_to_char_boundary(e.range.start) } else { - e.range.end + self.clamp_pos_to_char_boundary(e.range.end) } } else { pos @@ -754,6 +773,7 @@ impl TextArea { } fn clamp_pos_for_insertion(&self, pos: usize) -> usize { + let pos = self.clamp_pos_to_char_boundary(pos); // Do not allow inserting into the middle of an element if let Some(idx) = self.find_element_containing(pos) { let e = &self.elements[idx]; @@ -761,9 +781,9 @@ impl TextArea { let dist_start = pos.saturating_sub(e.range.start); let dist_end = e.range.end.saturating_sub(pos); if dist_start <= dist_end { - e.range.start + self.clamp_pos_to_char_boundary(e.range.start) } else { - e.range.end + self.clamp_pos_to_char_boundary(e.range.end) } } else { pos @@ -1041,6 +1061,7 @@ impl TextArea { mod tests { use super::*; // crossterm types are intentionally not imported here to avoid unused warnings + use pretty_assertions::assert_eq; use rand::prelude::*; fn rand_grapheme(rng: &mut rand::rngs::StdRng) -> String { @@ -1133,6 +1154,27 @@ mod tests { assert_eq!(t.cursor(), 5); } + #[test] + fn insert_str_at_clamps_to_char_boundary() { + let mut t = TextArea::new(); + t.insert_str("你"); + t.set_cursor(0); + t.insert_str_at(1, "A"); + assert_eq!(t.text(), "A你"); + assert_eq!(t.cursor(), 1); + } + + #[test] + fn set_text_clamps_cursor_to_char_boundary() { + let mut t = TextArea::new(); + t.insert_str("abcd"); + t.set_cursor(1); + t.set_text("你"); + assert_eq!(t.cursor(), 0); + t.insert_str("a"); + assert_eq!(t.text(), "a你"); + } + #[test] fn delete_backward_and_forward_edges() { let mut t = ta_with("abc"); diff --git a/codex-rs/tui/src/bottom_pane/unified_exec_footer.rs b/codex-rs/tui/src/bottom_pane/unified_exec_footer.rs index a0ea58bed13..736ce011143 100644 --- a/codex-rs/tui/src/bottom_pane/unified_exec_footer.rs +++ b/codex-rs/tui/src/bottom_pane/unified_exec_footer.rs @@ -8,34 +8,34 @@ use crate::live_wrap::take_prefix_by_width; use crate::render::renderable::Renderable; pub(crate) struct UnifiedExecFooter { - sessions: Vec, + processes: Vec, } impl UnifiedExecFooter { pub(crate) fn new() -> Self { Self { - sessions: Vec::new(), + processes: Vec::new(), } } - pub(crate) fn set_sessions(&mut self, sessions: Vec) -> bool { - if self.sessions == sessions { + pub(crate) fn set_processes(&mut self, processes: Vec) -> bool { + if self.processes == processes { return false; } - self.sessions = sessions; + self.processes = processes; true } pub(crate) fn is_empty(&self) -> bool { - self.sessions.is_empty() + self.processes.is_empty() } fn render_lines(&self, width: u16) -> Vec> { - if self.sessions.is_empty() || width < 4 { + if self.processes.is_empty() || width < 4 { return Vec::new(); } - let count = self.sessions.len(); + let count = self.processes.len(); let plural = if count == 1 { "" } else { "s" }; let message = format!(" {count} background terminal{plural} running · /ps to view"); let (truncated, _, _) = take_prefix_by_width(&message, width as usize); @@ -72,7 +72,7 @@ mod tests { #[test] fn render_more_sessions() { let mut footer = UnifiedExecFooter::new(); - footer.set_sessions(vec!["rg \"foo\" src".to_string()]); + footer.set_processes(vec!["rg \"foo\" src".to_string()]); let width = 50; let height = footer.desired_height(width); let mut buf = Buffer::empty(Rect::new(0, 0, width, height)); @@ -83,7 +83,7 @@ mod tests { #[test] fn render_many_sessions() { let mut footer = UnifiedExecFooter::new(); - footer.set_sessions((0..123).map(|idx| format!("cmd {idx}")).collect()); + footer.set_processes((0..123).map(|idx| format!("cmd {idx}")).collect()); let width = 50; let height = footer.desired_height(width); let mut buf = Buffer::empty(Rect::new(0, 0, width, height)); diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index f4418ceadb2..be05e9632af 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -63,7 +63,7 @@ use codex_core::protocol::WarningEvent; use codex_core::protocol::WebSearchBeginEvent; use codex_core::protocol::WebSearchEndEvent; use codex_core::skills::model::SkillMetadata; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::approvals::ElicitationRequestEvent; use codex_protocol::parse_command::ParsedCommand; @@ -136,7 +136,7 @@ use codex_common::approval_presets::ApprovalPreset; use codex_common::approval_presets::builtin_approval_presets; use codex_core::AuthManager; use codex_core::CodexAuth; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; use codex_file_search::FileMatch; @@ -147,6 +147,7 @@ use strum::IntoEnumIterator; const USER_SHELL_COMMAND_HELP_TITLE: &str = "Prefix a command with ! to run it locally"; const USER_SHELL_COMMAND_HELP_HINT: &str = "Example: !ls"; +const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; // Track information about an in-flight exec command. struct RunningCommand { command: Vec, @@ -154,7 +155,7 @@ struct RunningCommand { source: ExecCommandSource, } -struct UnifiedExecSessionSummary { +struct UnifiedExecProcessSummary { key: String, command_display: String, } @@ -332,7 +333,7 @@ pub(crate) struct ChatWidget { suppressed_exec_calls: HashSet, last_unified_wait: Option, task_complete_pending: bool, - unified_exec_sessions: Vec, + unified_exec_processes: Vec, mcp_startup_status: Option>, // Queue of interruptive UI events deferred during an active write cycle interrupts: InterruptManager, @@ -344,7 +345,7 @@ pub(crate) struct ChatWidget { current_status_header: String, // Previous status header to restore after a transient stream retry. retry_status_header: Option, - conversation_id: Option, + thread_id: Option, frame_requester: FrameRequester, // Whether to include the initial welcome banner on session configured show_welcome_banner: bool, @@ -435,7 +436,7 @@ impl ChatWidget { self.bottom_pane .set_history_metadata(event.history_log_id, event.history_entry_count); self.set_skills(None); - self.conversation_id = Some(event.session_id); + self.thread_id = Some(event.session_id); self.current_rollout_path = Some(event.rollout_path.clone()); let initial_messages = event.initial_messages.clone(); let model_for_header = event.model.clone(); @@ -478,7 +479,7 @@ impl ChatWidget { include_logs: bool, ) { // Build a fresh snapshot at the time of opening the note overlay. - let snapshot = self.feedback.snapshot(self.conversation_id); + let snapshot = self.feedback.snapshot(self.thread_id); let rollout = if include_logs { self.current_rollout_path.clone() } else { @@ -801,6 +802,8 @@ impl ChatWidget { fn on_interrupted_turn(&mut self, reason: TurnAbortReason) { // Finalize, log a gentle prompt, and clear running state. self.finalize_turn(); + self.unified_exec_processes.clear(); + self.sync_unified_exec_footer(); if reason != TurnAbortReason::ReviewEnded { self.add_to_history(history_cell::new_error_event( @@ -866,7 +869,7 @@ impl ChatWidget { fn on_exec_command_begin(&mut self, ev: ExecCommandBeginEvent) { self.flush_answer_stream_with_separator(); if is_unified_exec_source(ev.source) { - self.track_unified_exec_session_begin(&ev); + self.track_unified_exec_process_begin(&ev); if !is_standard_tool_call(&ev.parsed_cmd) { return; } @@ -885,10 +888,10 @@ impl ChatWidget { fn on_terminal_interaction(&mut self, ev: TerminalInteractionEvent) { self.flush_answer_stream_with_separator(); let command_display = self - .unified_exec_sessions + .unified_exec_processes .iter() - .find(|session| session.key == ev.process_id) - .map(|session| session.command_display.clone()); + .find(|process| process.key == ev.process_id) + .map(|process| process.command_display.clone()); if ev.stdin.is_empty() { // Empty stdin means we are still waiting on background output; keep a live shimmer cell. if let Some(wait_cell) = self.active_cell.as_mut().and_then(|cell| { @@ -896,7 +899,7 @@ impl ChatWidget { .downcast_mut::() }) && wait_cell.matches(command_display.as_deref()) { - // Same session still waiting; update command display if it shows up late. + // Same process still waiting; update command display if it shows up late. wait_cell.update_command_display(command_display); self.request_redraw(); return; @@ -965,7 +968,7 @@ impl ChatWidget { fn on_exec_command_end(&mut self, ev: ExecCommandEndEvent) { if is_unified_exec_source(ev.source) { - self.track_unified_exec_session_end(&ev); + self.track_unified_exec_process_end(&ev); if !self.bottom_pane.is_task_running() { return; } @@ -974,20 +977,20 @@ impl ChatWidget { self.defer_or_handle(|q| q.push_exec_end(ev), |s| s.handle_exec_end_now(ev2)); } - fn track_unified_exec_session_begin(&mut self, ev: &ExecCommandBeginEvent) { + fn track_unified_exec_process_begin(&mut self, ev: &ExecCommandBeginEvent) { if ev.source != ExecCommandSource::UnifiedExecStartup { return; } let key = ev.process_id.clone().unwrap_or(ev.call_id.to_string()); let command_display = strip_bash_lc_and_escape(&ev.command); if let Some(existing) = self - .unified_exec_sessions + .unified_exec_processes .iter_mut() - .find(|session| session.key == key) + .find(|process| process.key == key) { existing.command_display = command_display; } else { - self.unified_exec_sessions.push(UnifiedExecSessionSummary { + self.unified_exec_processes.push(UnifiedExecProcessSummary { key, command_display, }); @@ -995,23 +998,23 @@ impl ChatWidget { self.sync_unified_exec_footer(); } - fn track_unified_exec_session_end(&mut self, ev: &ExecCommandEndEvent) { + fn track_unified_exec_process_end(&mut self, ev: &ExecCommandEndEvent) { let key = ev.process_id.clone().unwrap_or(ev.call_id.to_string()); - let before = self.unified_exec_sessions.len(); - self.unified_exec_sessions - .retain(|session| session.key != key); - if self.unified_exec_sessions.len() != before { + let before = self.unified_exec_processes.len(); + self.unified_exec_processes + .retain(|process| process.key != key); + if self.unified_exec_processes.len() != before { self.sync_unified_exec_footer(); } } fn sync_unified_exec_footer(&mut self) { - let sessions = self - .unified_exec_sessions + let processes = self + .unified_exec_processes .iter() - .map(|session| session.command_display.clone()) + .map(|process| process.command_display.clone()) .collect(); - self.bottom_pane.set_unified_exec_sessions(sessions); + self.bottom_pane.set_unified_exec_processes(processes); } fn on_mcp_tool_call_begin(&mut self, ev: McpToolCallBeginEvent) { @@ -1399,10 +1402,7 @@ impl ChatWidget { } } - pub(crate) fn new( - common: ChatWidgetInit, - conversation_manager: Arc, - ) -> Self { + pub(crate) fn new(common: ChatWidgetInit, thread_manager: Arc) -> Self { let ChatWidgetInit { config, frame_requester, @@ -1420,7 +1420,7 @@ impl ChatWidget { config.model = Some(model.clone()); let mut rng = rand::rng(); let placeholder = EXAMPLE_PROMPTS[rng.random_range(0..EXAMPLE_PROMPTS.len())].to_string(); - let codex_op_tx = spawn_agent(config.clone(), app_event_tx.clone(), conversation_manager); + let codex_op_tx = spawn_agent(config.clone(), app_event_tx.clone(), thread_manager); let mut widget = Self { app_event_tx: app_event_tx.clone(), @@ -1457,14 +1457,14 @@ impl ChatWidget { suppressed_exec_calls: HashSet::new(), last_unified_wait: None, task_complete_pending: false, - unified_exec_sessions: Vec::new(), + unified_exec_processes: Vec::new(), mcp_startup_status: None, interrupts: InterruptManager::new(), reasoning_buffer: String::new(), full_reasoning_buffer: String::new(), current_status_header: String::from("Working"), retry_status_header: None, - conversation_id: None, + thread_id: None, queued_user_messages: VecDeque::new(), show_welcome_banner: is_first_run, suppress_session_configured_redraw: false, @@ -1486,7 +1486,7 @@ impl ChatWidget { /// Create a ChatWidget attached to an existing conversation (e.g., a fork). pub(crate) fn new_from_existing( common: ChatWidgetInit, - conversation: std::sync::Arc, + conversation: std::sync::Arc, session_configured: codex_core::protocol::SessionConfiguredEvent, ) -> Self { let ChatWidgetInit { @@ -1543,14 +1543,14 @@ impl ChatWidget { suppressed_exec_calls: HashSet::new(), last_unified_wait: None, task_complete_pending: false, - unified_exec_sessions: Vec::new(), + unified_exec_processes: Vec::new(), mcp_startup_status: None, interrupts: InterruptManager::new(), reasoning_buffer: String::new(), full_reasoning_buffer: String::new(), current_status_header: String::from("Working"), retry_status_header: None, - conversation_id: None, + thread_id: None, queued_user_messages: VecDeque::new(), show_welcome_banner: false, suppress_session_configured_redraw: true, @@ -1639,6 +1639,9 @@ impl ChatWidget { InputResult::Command(cmd) => { self.dispatch_command(cmd); } + InputResult::CommandWithArgs(cmd, args) => { + self.dispatch_command_with_args(cmd, args); + } InputResult::None => {} } } @@ -1709,6 +1712,10 @@ impl ChatWidget { SlashCommand::Resume => { self.app_event_tx.send(AppEvent::OpenResumePicker); } + SlashCommand::Rename => { + self.show_rename_prompt(); + self.request_redraw(); + } SlashCommand::Init => { let init_target = self.config.cwd.join(DEFAULT_PROJECT_DOC_FILENAME); if init_target.exists() { @@ -1835,6 +1842,73 @@ impl ChatWidget { } } + fn show_rename_prompt(&mut self) { + let tx = self.app_event_tx.clone(); + let view = CustomPromptView::new( + "Rename session".to_string(), + "Type a new name and press Enter".to_string(), + None, + Box::new(move |title: String| { + tx.send(AppEvent::InsertHistoryCell(Box::new( + history_cell::new_info_event(format!("Session renamed to \"{title}\""), None), + ))); + tx.send(AppEvent::CodexOp(Op::SetSessionTitle { title })); + }), + ); + + self.bottom_pane.show_view(Box::new(view)); + } + + fn dispatch_command_with_args(&mut self, cmd: SlashCommand, args: String) { + if !cmd.available_during_task() && self.bottom_pane.is_task_running() { + let message = format!( + "'/{}' is disabled while a task is in progress.", + cmd.command() + ); + self.add_to_history(history_cell::new_error_event(message)); + self.request_redraw(); + return; + } + + let trimmed = args.trim(); + match cmd { + SlashCommand::Rename if !trimmed.is_empty() => { + let name = trimmed.to_string(); + self.add_info_message(format!("Session renamed to \"{name}\""), None); + self.app_event_tx + .send(AppEvent::CodexOp(Op::SetSessionName { name })); + } + SlashCommand::Review if !trimmed.is_empty() => { + self.submit_op(Op::Review { + review_request: ReviewRequest { + target: ReviewTarget::Custom { + instructions: trimmed.to_string(), + }, + user_facing_hint: None, + }, + }); + } + _ => self.dispatch_command(cmd), + } + } + + fn show_rename_prompt(&mut self) { + let tx = self.app_event_tx.clone(); + let view = CustomPromptView::new( + "Rename session".to_string(), + "Type a new name and press Enter".to_string(), + None, + Box::new(move |name: String| { + tx.send(AppEvent::InsertHistoryCell(Box::new( + history_cell::new_info_event(format!("Session renamed to \"{name}\""), None), + ))); + tx.send(AppEvent::CodexOp(Op::SetSessionName { name })); + }), + ); + + self.bottom_pane.show_view(Box::new(view)); + } + pub(crate) fn handle_paste(&mut self, text: String) { self.bottom_pane.handle_paste(text); } @@ -1953,7 +2027,10 @@ impl ChatWidget { } self.codex_op_tx - .send(Op::UserInput { items }) + .send(Op::UserInput { + items, + final_output_json_schema: None, + }) .unwrap_or_else(|e| { tracing::error!("failed to send message: {e}"); }); @@ -2109,6 +2186,7 @@ impl ChatWidget { } EventMsg::ExitedReviewMode(review) => self.on_exited_review_mode(review), EventMsg::ContextCompacted(_) => self.on_agent_message("Context compacted".to_owned()), + EventMsg::ThreadRolledBack(_) => {} EventMsg::RawResponseItem(_) | EventMsg::ItemStarted(_) | EventMsg::ItemCompleted(_) @@ -2250,7 +2328,7 @@ impl ChatWidget { self.auth_manager.as_ref(), token_info, total_usage, - &self.conversation_id, + &self.thread_id, self.rate_limit_snapshot.as_ref(), self.plan_type, Local::now(), @@ -2259,12 +2337,12 @@ impl ChatWidget { } pub(crate) fn add_ps_output(&mut self) { - let sessions = self - .unified_exec_sessions + let processes = self + .unified_exec_processes .iter() - .map(|session| session.command_display.clone()) + .map(|process| process.command_display.clone()) .collect(); - self.add_to_history(history_cell::new_unified_exec_sessions_output(sessions)); + self.add_to_history(history_cell::new_unified_exec_processes_output(processes)); } fn stop_rate_limit_poller(&mut self) { @@ -2304,7 +2382,7 @@ impl ChatWidget { let models = self.models_manager.try_list_models(&self.config).ok()?; models .iter() - .find(|preset| preset.model == NUDGE_MODEL_SLUG) + .find(|preset| preset.show_in_picker && preset.model == NUDGE_MODEL_SLUG) .cloned() } @@ -2407,19 +2485,63 @@ impl ChatWidget { /// Open a popup to choose a quick auto model. Selecting "All models" /// opens the full picker with every available preset. pub(crate) fn open_model_popup(&mut self) { - let presets: Vec = - // todo(aibrahim): make this async function - match self.models_manager.try_list_models(&self.config) { - Ok(models) => models, - Err(_) => { - self.add_info_message( - "Models are being updated; please try /model again in a moment." - .to_string(), - None, - ); - return; - } - }; + let presets: Vec = match self.models_manager.try_list_models(&self.config) { + Ok(models) => models, + Err(_) => { + self.add_info_message( + "Models are being updated; please try /model again in a moment.".to_string(), + None, + ); + return; + } + }; + self.open_model_popup_with_presets(presets); + } + + fn model_menu_header(&self, title: &str, subtitle: &str) -> Box { + let title = title.to_string(); + let subtitle = subtitle.to_string(); + let mut header = ColumnRenderable::new(); + header.push(Line::from(title.bold())); + header.push(Line::from(subtitle.dim())); + if let Some(warning) = self.model_menu_warning_line() { + header.push(warning); + } + Box::new(header) + } + + fn model_menu_warning_line(&self) -> Option> { + let base_url = self.custom_openai_base_url()?; + let warning = format!( + "Warning: OPENAI_BASE_URL is set to {base_url}. Selecting models may not be supported or work properly." + ); + Some(Line::from(warning.red())) + } + + fn custom_openai_base_url(&self) -> Option { + if !self.config.model_provider.is_openai() { + return None; + } + + let base_url = self.config.model_provider.base_url.as_ref()?; + let trimmed = base_url.trim(); + if trimmed.is_empty() { + return None; + } + + let normalized = trimmed.trim_end_matches('/'); + if normalized == DEFAULT_OPENAI_BASE_URL { + return None; + } + + Some(trimmed.to_string()) + } + + pub(crate) fn open_model_popup_with_presets(&mut self, presets: Vec) { + let presets: Vec = presets + .into_iter() + .filter(|preset| preset.show_in_picker) + .collect(); let current_label = presets .iter() @@ -2483,11 +2605,14 @@ impl ChatWidget { }); } + let header = self.model_menu_header( + "Select Model", + "Pick a quick auto mode or browse all models.", + ); self.bottom_pane.show_selection_view(SelectionViewParams { - title: Some("Select Model".to_string()), - subtitle: Some("Pick a quick auto mode or browse all models.".to_string()), footer_hint: Some(standard_popup_hint_line()), items, + header, ..Default::default() }); } @@ -2538,14 +2663,14 @@ impl ChatWidget { }); } + let header = self.model_menu_header( + "Select Model and Effort", + "Access legacy models by running codex -m or in your config.toml", + ); self.bottom_pane.show_selection_view(SelectionViewParams { - title: Some("Select Model and Effort".to_string()), - subtitle: Some( - "Access legacy models by running codex -m or in your config.toml" - .to_string(), - ), footer_hint: Some("Press enter to select reasoning effort, or esc to dismiss.".into()), items, + header, ..Default::default() }); } @@ -3505,8 +3630,8 @@ impl ChatWidget { .unwrap_or_default() } - pub(crate) fn conversation_id(&self) -> Option { - self.conversation_id + pub(crate) fn thread_id(&self) -> Option { + self.thread_id } pub(crate) fn rollout_path(&self) -> Option { diff --git a/codex-rs/tui/src/chatwidget/agent.rs b/codex-rs/tui/src/chatwidget/agent.rs index 240972347fb..d8428b221fa 100644 --- a/codex-rs/tui/src/chatwidget/agent.rs +++ b/codex-rs/tui/src/chatwidget/agent.rs @@ -1,8 +1,8 @@ use std::sync::Arc; -use codex_core::CodexConversation; -use codex_core::ConversationManager; -use codex_core::NewConversation; +use codex_core::CodexThread; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::config::Config; use codex_core::protocol::Event; use codex_core::protocol::EventMsg; @@ -18,17 +18,17 @@ use crate::app_event_sender::AppEventSender; pub(crate) fn spawn_agent( config: Config, app_event_tx: AppEventSender, - server: Arc, + server: Arc, ) -> UnboundedSender { let (codex_op_tx, mut codex_op_rx) = unbounded_channel::(); let app_event_tx_clone = app_event_tx; tokio::spawn(async move { - let NewConversation { - conversation_id: _, - conversation, + let NewThread { + thread, session_configured, - } = match server.new_conversation(config).await { + .. + } = match server.start_thread(config).await { Ok(v) => v, #[allow(clippy::print_stderr)] Err(err) => { @@ -52,17 +52,17 @@ pub(crate) fn spawn_agent( }; app_event_tx_clone.send(AppEvent::CodexEvent(ev)); - let conversation_clone = conversation.clone(); + let thread_clone = thread.clone(); tokio::spawn(async move { while let Some(op) = codex_op_rx.recv().await { - let id = conversation_clone.submit(op).await; + let id = thread_clone.submit(op).await; if let Err(e) = id { tracing::error!("failed to submit op: {e}"); } } }); - while let Ok(event) = conversation.next_event().await { + while let Ok(event) = thread.next_event().await { app_event_tx_clone.send(AppEvent::CodexEvent(event)); } }); @@ -70,11 +70,11 @@ pub(crate) fn spawn_agent( codex_op_tx } -/// Spawn agent loops for an existing conversation (e.g., a forked conversation). +/// Spawn agent loops for an existing thread (e.g., a forked thread). /// Sends the provided `SessionConfiguredEvent` immediately, then forwards subsequent /// events and accepts Ops for submission. pub(crate) fn spawn_agent_from_existing( - conversation: std::sync::Arc, + thread: std::sync::Arc, session_configured: codex_core::protocol::SessionConfiguredEvent, app_event_tx: AppEventSender, ) -> UnboundedSender { @@ -89,17 +89,17 @@ pub(crate) fn spawn_agent_from_existing( }; app_event_tx_clone.send(AppEvent::CodexEvent(ev)); - let conversation_clone = conversation.clone(); + let thread_clone = thread.clone(); tokio::spawn(async move { while let Some(op) = codex_op_rx.recv().await { - let id = conversation_clone.submit(op).await; + let id = thread_clone.submit(op).await; if let Err(e) = id { tracing::error!("failed to submit op: {e}"); } } }); - while let Ok(event) = conversation.next_event().await { + while let Ok(event) = thread.next_event().await { app_event_tx_clone.send(AppEvent::CodexEvent(event)); } }); diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_exec_multiline_prefix_no_execpolicy.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_exec_multiline_prefix_no_execpolicy.snap new file mode 100644 index 00000000000..3c256fe9231 --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_exec_multiline_prefix_no_execpolicy.snap @@ -0,0 +1,16 @@ +--- +source: tui/src/chatwidget/tests.rs +expression: contents +--- + + + Would you like to run the following command? + + $ python - <<'PY' + print('hello') + PY + +› 1. Yes, proceed (y) + 2. No, and tell Codex what to do differently (esc) + + Press enter to confirm or esc to cancel diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap index ed18675ac39..e394605dcc5 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap @@ -2,6 +2,8 @@ source: tui/src/chatwidget/tests.rs expression: terminal.backend().vt100().screen().contents() --- + + Would you like to make the following edits? Reason: The model wants to apply changes @@ -12,6 +14,7 @@ expression: terminal.backend().vt100().screen().contents() 2 +world › 1. Yes, proceed (y) - 2. No, and tell Codex what to do differently (esc) + 2. Yes, and don't ask again for these files (a) + 3. No, and tell Codex what to do differently (esc) Press enter to confirm or esc to cancel diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__model_picker_filters_hidden_models.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__model_picker_filters_hidden_models.snap new file mode 100644 index 00000000000..56dff7b5f0c --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__model_picker_filters_hidden_models.snap @@ -0,0 +1,11 @@ +--- +source: tui/src/chatwidget/tests.rs +assertion_line: 1989 +expression: popup +--- + Select Model and Effort + Access legacy models by running codex -m or in your config.toml + +› 1. test-visible-model (current) test-visible-model description + + Press enter to select reasoning effort, or esc to dismiss. diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index a0ff8d42e9d..cf53b7fac96 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -11,6 +11,7 @@ use codex_core::config::Config; use codex_core::config::ConfigBuilder; use codex_core::config::Constrained; use codex_core::config::ConstraintError; +use codex_core::features::Feature; use codex_core::models_manager::manager::ModelsManager; use codex_core::protocol::AgentMessageDeltaEvent; use codex_core::protocol::AgentMessageEvent; @@ -47,7 +48,7 @@ use codex_core::protocol::UndoCompletedEvent; use codex_core::protocol::UndoStartedEvent; use codex_core::protocol::ViewImageToolCallEvent; use codex_core::protocol::WarningEvent; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffortPreset; @@ -101,7 +102,7 @@ fn snapshot(percent: f64) -> RateLimitSnapshot { async fn resumed_initial_messages_render_history() { let (mut chat, mut rx, _ops) = make_chatwidget_manual(None).await; - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let rollout_file = NamedTempFile::new().unwrap(); let configured = codex_core::protocol::SessionConfiguredEvent { session_id: conversation_id, @@ -313,7 +314,7 @@ async fn helpers_are_available_and_do_not_panic() { let tx = AppEventSender::new(tx_raw); let cfg = test_config().await; let resolved_model = ModelsManager::get_model_offline(cfg.model.as_deref()); - let conversation_manager = Arc::new(ConversationManager::with_models_provider( + let thread_manager = Arc::new(ThreadManager::with_models_provider( CodexAuth::from_api_key("test"), cfg.model_provider.clone(), )); @@ -326,12 +327,12 @@ async fn helpers_are_available_and_do_not_panic() { initial_images: Vec::new(), enhanced_keys_supported: false, auth_manager, - models_manager: conversation_manager.get_models_manager(), + models_manager: thread_manager.get_models_manager(), feedback: codex_feedback::CodexFeedback::new(), is_first_run: true, model: resolved_model, }; - let mut w = ChatWidget::new(init, conversation_manager); + let mut w = ChatWidget::new(init, thread_manager); // Basic construction sanity. let _ = &mut w; } @@ -365,6 +366,7 @@ async fn make_chatwidget_manual( skills: None, }); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")); + let codex_home = cfg.codex_home.clone(); let widget = ChatWidget { app_event_tx, codex_op_tx: op_tx, @@ -373,7 +375,7 @@ async fn make_chatwidget_manual( config: cfg, model: resolved_model.clone(), auth_manager: auth_manager.clone(), - models_manager: Arc::new(ModelsManager::new(auth_manager)), + models_manager: Arc::new(ModelsManager::new(codex_home, auth_manager)), session_header: SessionHeader::new(resolved_model), initial_user_message: None, token_info: None, @@ -387,14 +389,14 @@ async fn make_chatwidget_manual( suppressed_exec_calls: HashSet::new(), last_unified_wait: None, task_complete_pending: false, - unified_exec_sessions: Vec::new(), + unified_exec_processes: Vec::new(), mcp_startup_status: None, interrupts: InterruptManager::new(), reasoning_buffer: String::new(), full_reasoning_buffer: String::new(), current_status_header: String::from("Working"), retry_status_header: None, - conversation_id: None, + thread_id: None, frame_requester: FrameRequester::test_dummy(), show_welcome_banner: true, queued_user_messages: VecDeque::new(), @@ -414,7 +416,10 @@ async fn make_chatwidget_manual( fn set_chatgpt_auth(chat: &mut ChatWidget) { chat.auth_manager = AuthManager::from_auth_for_testing(CodexAuth::create_dummy_chatgpt_auth_for_testing()); - chat.models_manager = Arc::new(ModelsManager::new(chat.auth_manager.clone())); + chat.models_manager = Arc::new(ModelsManager::new( + chat.config.codex_home.clone(), + chat.auth_manager.clone(), + )); } pub(crate) async fn make_chatwidget_manual_with_sender() -> ( @@ -1961,6 +1966,41 @@ async fn model_selection_popup_snapshot() { assert_snapshot!("model_selection_popup", popup); } +#[tokio::test] +async fn model_picker_hides_show_in_picker_false_models_from_cache() { + let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("test-visible-model")).await; + let preset = |slug: &str, show_in_picker: bool| ModelPreset { + id: slug.to_string(), + model: slug.to_string(), + display_name: slug.to_string(), + description: format!("{slug} description"), + default_reasoning_effort: ReasoningEffortConfig::Medium, + supported_reasoning_efforts: vec![ReasoningEffortPreset { + effort: ReasoningEffortConfig::Medium, + description: "medium".to_string(), + }], + is_default: false, + upgrade: None, + show_in_picker, + supported_in_api: true, + }; + + chat.open_model_popup_with_presets(vec![ + preset("test-visible-model", true), + preset("test-hidden-model", false), + ]); + let popup = render_bottom_popup(&chat, 80); + assert_snapshot!("model_picker_filters_hidden_models", popup); + assert!( + popup.contains("test-visible-model"), + "expected visible model to appear in picker:\n{popup}" + ); + assert!( + !popup.contains("test-hidden-model"), + "expected hidden model to be excluded from picker:\n{popup}" + ); +} + #[tokio::test] async fn approvals_selection_popup_snapshot() { let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await; @@ -2465,6 +2505,48 @@ async fn approval_modal_exec_without_reason_snapshot() -> anyhow::Result<()> { Ok(()) } +// Snapshot test: approval modal with a proposed execpolicy prefix that is multi-line; +// we should not offer adding it to execpolicy. +#[tokio::test] +async fn approval_modal_exec_multiline_prefix_hides_execpolicy_option_snapshot() +-> anyhow::Result<()> { + let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await; + chat.config.approval_policy.set(AskForApproval::OnRequest)?; + + let script = "python - <<'PY'\nprint('hello')\nPY".to_string(); + let command = vec!["bash".into(), "-lc".into(), script]; + let ev = ExecApprovalRequestEvent { + call_id: "call-approve-cmd-multiline-trunc".into(), + turn_id: "turn-approve-cmd-multiline-trunc".into(), + command: command.clone(), + cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)), + parsed_cmd: vec![], + }; + chat.handle_codex_event(Event { + id: "sub-approve-multiline-trunc".into(), + msg: EventMsg::ExecApprovalRequest(ev), + }); + + let width = 100; + let height = chat.desired_height(width); + let mut terminal = + ratatui::Terminal::new(VT100Backend::new(width, height)).expect("create terminal"); + terminal.set_viewport_area(Rect::new(0, 0, width, height)); + terminal + .draw(|f| chat.render(f.area(), f.buffer_mut())) + .expect("draw approval modal (multiline prefix)"); + let contents = terminal.backend().vt100().screen().contents(); + assert!(!contents.contains("don't ask again")); + assert_snapshot!( + "approval_modal_exec_multiline_prefix_no_execpolicy", + contents + ); + + Ok(()) +} + // Snapshot test: patch approval modal #[tokio::test] async fn approval_modal_patch_snapshot() -> anyhow::Result<()> { @@ -2580,6 +2662,26 @@ async fn interrupt_prepends_queued_messages_before_existing_composer_text() { let _ = drain_insert_history(&mut rx); } +#[tokio::test] +async fn interrupt_clears_unified_exec_processes() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + + begin_unified_exec_startup(&mut chat, "call-1", "process-1", "sleep 5"); + begin_unified_exec_startup(&mut chat, "call-2", "process-2", "sleep 6"); + assert_eq!(chat.unified_exec_processes.len(), 2); + + chat.handle_codex_event(Event { + id: "turn-1".into(), + msg: EventMsg::TurnAborted(codex_core::protocol::TurnAbortedEvent { + reason: TurnAbortReason::Interrupted, + }), + }); + + assert!(chat.unified_exec_processes.is_empty()); + + let _ = drain_insert_history(&mut rx); +} + // Snapshot test: ChatWidget at very small heights (idle) // Ensures overall layout behaves when terminal height is extremely constrained. #[tokio::test] diff --git a/codex-rs/tui/src/diff_render.rs b/codex-rs/tui/src/diff_render.rs index ff0fb509b8a..505ebc8edcf 100644 --- a/codex-rs/tui/src/diff_render.rs +++ b/codex-rs/tui/src/diff_render.rs @@ -299,7 +299,18 @@ fn render_change(change: &FileChange, out: &mut Vec>, width: usi } } +/// Format a path for display relative to the current working directory when +/// possible, keeping output stable in jj/no-`.git` workspaces (e.g. image +/// tool calls should show `example.png` instead of an absolute path). pub(crate) fn display_path_for(path: &Path, cwd: &Path) -> String { + if path.is_relative() { + return path.display().to_string(); + } + + if let Ok(stripped) = path.strip_prefix(cwd) { + return stripped.display().to_string(); + } + let path_in_same_repo = match (get_git_repo_root(cwd), get_git_repo_root(path)) { (Some(cwd_repo), Some(path_repo)) => cwd_repo == path_repo, _ => false, @@ -420,6 +431,7 @@ fn style_del() -> Style { mod tests { use super::*; use insta::assert_snapshot; + use pretty_assertions::assert_eq; use ratatui::Terminal; use ratatui::backend::TestBackend; use ratatui::text::Text; @@ -459,6 +471,26 @@ mod tests { assert_snapshot!(name, text); } + #[test] + fn display_path_prefers_cwd_without_git_repo() { + let cwd = if cfg!(windows) { + PathBuf::from(r"C:\workspace\codex") + } else { + PathBuf::from("/workspace/codex") + }; + let path = cwd.join("tui").join("example.png"); + + let rendered = display_path_for(&path, &cwd); + + assert_eq!( + rendered, + PathBuf::from("tui") + .join("example.png") + .display() + .to_string() + ); + } + #[test] fn ui_snapshot_wrap_behavior_insert() { // Narrow width to force wrapping within our diff line rendering diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index b487c6aeff5..4e16306d132 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -517,29 +517,29 @@ pub(crate) fn new_unified_exec_wait_live( } #[derive(Debug)] -struct UnifiedExecSessionsCell { - sessions: Vec, +struct UnifiedExecProcessesCell { + processes: Vec, } -impl UnifiedExecSessionsCell { - fn new(sessions: Vec) -> Self { - Self { sessions } +impl UnifiedExecProcessesCell { + fn new(processes: Vec) -> Self { + Self { processes } } } -impl HistoryCell for UnifiedExecSessionsCell { +impl HistoryCell for UnifiedExecProcessesCell { fn display_lines(&self, width: u16) -> Vec> { if width == 0 { return Vec::new(); } let wrap_width = width as usize; - let max_sessions = 16usize; + let max_processes = 16usize; let mut out: Vec> = Vec::new(); out.push(vec!["Background terminals".bold()].into()); out.push("".into()); - if self.sessions.is_empty() { + if self.processes.is_empty() { out.push(" • No background terminals running.".italic().into()); return out; } @@ -549,8 +549,8 @@ impl HistoryCell for UnifiedExecSessionsCell { let truncation_suffix = " [...]"; let truncation_suffix_width = UnicodeWidthStr::width(truncation_suffix); let mut shown = 0usize; - for command in &self.sessions { - if shown >= max_sessions { + for command in &self.processes { + if shown >= max_processes { break; } let (snippet, snippet_truncated) = { @@ -590,7 +590,7 @@ impl HistoryCell for UnifiedExecSessionsCell { shown += 1; } - let remaining = self.sessions.len().saturating_sub(shown); + let remaining = self.processes.len().saturating_sub(shown); if remaining > 0 { let more_text = format!("... and {remaining} more running"); if wrap_width <= prefix_width { @@ -610,9 +610,9 @@ impl HistoryCell for UnifiedExecSessionsCell { } } -pub(crate) fn new_unified_exec_sessions_output(sessions: Vec) -> CompositeHistoryCell { +pub(crate) fn new_unified_exec_processes_output(processes: Vec) -> CompositeHistoryCell { let command = PlainHistoryCell::new(vec!["/ps".magenta().into()]); - let summary = UnifiedExecSessionsCell::new(sessions); + let summary = UnifiedExecProcessesCell::new(processes); CompositeHistoryCell::new(vec![Box::new(command), Box::new(summary)]) } @@ -816,11 +816,11 @@ pub(crate) fn padded_emoji(emoji: &str) -> String { #[derive(Debug)] struct TooltipHistoryCell { - tip: &'static str, + tip: String, } impl TooltipHistoryCell { - fn new(tip: &'static str) -> Self { + fn new(tip: String) -> Self { Self { tip } } } @@ -1819,14 +1819,14 @@ mod tests { #[test] fn ps_output_empty_snapshot() { - let cell = new_unified_exec_sessions_output(Vec::new()); + let cell = new_unified_exec_processes_output(Vec::new()); let rendered = render_lines(&cell.display_lines(60)).join("\n"); insta::assert_snapshot!(rendered); } #[test] fn ps_output_multiline_snapshot() { - let cell = new_unified_exec_sessions_output(vec![ + let cell = new_unified_exec_processes_output(vec![ "echo hello\nand then some extra text".to_string(), "rg \"foo\" src".to_string(), ]); @@ -1836,7 +1836,7 @@ mod tests { #[test] fn ps_output_long_command_snapshot() { - let cell = new_unified_exec_sessions_output(vec![String::from( + let cell = new_unified_exec_processes_output(vec![String::from( "rg \"foo\" src --glob '**/*.rs' --max-count 1000 --no-ignore --hidden --follow --glob '!target/**'", )]); let rendered = render_lines(&cell.display_lines(36)).join("\n"); @@ -1845,8 +1845,9 @@ mod tests { #[test] fn ps_output_many_sessions_snapshot() { - let cell = - new_unified_exec_sessions_output((0..20).map(|idx| format!("command {idx}")).collect()); + let cell = new_unified_exec_processes_output( + (0..20).map(|idx| format!("command {idx}")).collect(), + ); let rendered = render_lines(&cell.display_lines(32)).join("\n"); insta::assert_snapshot!(rendered); } diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 6b784affce6..6f4faaad659 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -19,7 +19,7 @@ use codex_core::config::ConfigOverrides; use codex_core::config::find_codex_home; use codex_core::config::load_config_as_toml_with_cli_overrides; use codex_core::config::resolve_oss_provider; -use codex_core::find_conversation_path_by_id_str; +use codex_core::find_thread_path_by_id_str; use codex_core::get_platform_sandbox; use codex_core::protocol::AskForApproval; use codex_protocol::config_types::SandboxMode; @@ -97,7 +97,6 @@ pub use markdown_render::render_markdown_text; pub use public_widgets::composer_input::ComposerAction; pub use public_widgets::composer_input::ComposerInput; use std::io::Write as _; - // (tests access modules directly within the crate) pub async fn run_main( @@ -344,6 +343,8 @@ async fn run_ratatui_app( ) -> color_eyre::Result { color_eyre::install()?; + tooltips::announcement::prewarm(); + // Forward panic reports through tracing so they appear in the UI status // line, but do not swallow the default/color-eyre panic handler. // Chain to the previous hook so users still get a rich panic report @@ -370,7 +371,7 @@ async fn run_ratatui_app( crate::tui::restore()?; return Ok(AppExitInfo { token_usage: codex_core::protocol::TokenUsage::default(), - conversation_id: None, + thread_id: None, update_action: Some(action), }); } @@ -409,7 +410,7 @@ async fn run_ratatui_app( let _ = tui.terminal.clear(); return Ok(AppExitInfo { token_usage: codex_core::protocol::TokenUsage::default(), - conversation_id: None, + thread_id: None, update_action: None, }); } @@ -429,7 +430,7 @@ async fn run_ratatui_app( // Determine resume behavior: explicit id, then resume last, then picker. let resume_selection = if let Some(id_str) = cli.resume_session_id.as_deref() { - match find_conversation_path_by_id_str(&config.codex_home, id_str).await? { + match find_thread_path_by_id_str(&config.codex_home, id_str).await? { Some(path) => resume_picker::ResumeSelection::Resume(path), None => { error!("Error finding conversation path: {id_str}"); @@ -444,14 +445,14 @@ async fn run_ratatui_app( } return Ok(AppExitInfo { token_usage: codex_core::protocol::TokenUsage::default(), - conversation_id: None, + thread_id: None, update_action: None, }); } } } else if cli.resume_last { let provider_filter = vec![config.model_provider_id.clone()]; - match RolloutRecorder::list_conversations( + match RolloutRecorder::list_threads( &config.codex_home, 1, None, @@ -482,7 +483,7 @@ async fn run_ratatui_app( session_log::log_session_end(); return Ok(AppExitInfo { token_usage: codex_core::protocol::TokenUsage::default(), - conversation_id: None, + thread_id: None, update_action: None, }); } diff --git a/codex-rs/tui/src/onboarding/auth.rs b/codex-rs/tui/src/onboarding/auth.rs index 6307e6e7dc9..9de8fd17093 100644 --- a/codex-rs/tui/src/onboarding/auth.rs +++ b/codex-rs/tui/src/onboarding/auth.rs @@ -279,6 +279,12 @@ impl AuthModeWidget { lines.push("".into()); lines.push(Line::from(state.auth_url.as_str().cyan().underlined())); lines.push("".into()); + lines.push(Line::from(vec![ + " On a remote or headless machine? Use ".into(), + "codex login --device-auth".cyan(), + " instead".into(), + ])); + lines.push("".into()); } lines.push(" Press Esc to cancel".dim().into()); diff --git a/codex-rs/tui/src/resume_picker.rs b/codex-rs/tui/src/resume_picker.rs index 0f55bb5e0d8..13f7b9db7af 100644 --- a/codex-rs/tui/src/resume_picker.rs +++ b/codex-rs/tui/src/resume_picker.rs @@ -5,11 +5,11 @@ use std::sync::Arc; use chrono::DateTime; use chrono::Utc; -use codex_core::ConversationItem; -use codex_core::ConversationsPage; use codex_core::Cursor; use codex_core::INTERACTIVE_SESSION_SOURCES; use codex_core::RolloutRecorder; +use codex_core::ThreadItem; +use codex_core::ThreadsPage; use codex_core::path_utils; use codex_protocol::items::TurnItem; use color_eyre::eyre::Result; @@ -61,7 +61,7 @@ enum BackgroundEvent { PageLoaded { request_token: usize, search_token: Option, - page: std::io::Result, + page: std::io::Result, }, } @@ -89,7 +89,7 @@ pub async fn run_resume_picker( let tx = loader_tx.clone(); tokio::spawn(async move { let provider_filter = vec![request.default_provider.clone()]; - let page = RolloutRecorder::list_conversations( + let page = RolloutRecorder::list_threads( &request.codex_home, PAGE_SIZE, request.cursor.as_ref(), @@ -415,7 +415,7 @@ impl PickerState { self.pagination.loading = LoadingState::Idle; } - fn ingest_page(&mut self, page: ConversationsPage) { + fn ingest_page(&mut self, page: ThreadsPage) { if let Some(cursor) = page.next_cursor.clone() { self.pagination.next_cursor = Some(cursor); } else { @@ -627,11 +627,11 @@ impl PickerState { } } -fn rows_from_items(items: Vec) -> Vec { +fn rows_from_items(items: Vec) -> Vec { items.into_iter().map(|item| head_to_row(&item)).collect() } -fn head_to_row(item: &ConversationItem) -> Row { +fn head_to_row(item: &ThreadItem) -> Row { let created_at = item .created_at .as_deref() @@ -1077,8 +1077,8 @@ mod tests { ] } - fn make_item(path: &str, ts: &str, preview: &str) -> ConversationItem { - ConversationItem { + fn make_item(path: &str, ts: &str, preview: &str) -> ThreadItem { + ThreadItem { path: PathBuf::from(path), head: head_with_ts_and_user_text(ts, &[preview]), created_at: Some(ts.to_string()), @@ -1092,12 +1092,12 @@ mod tests { } fn page( - items: Vec, + items: Vec, next_cursor: Option, num_scanned_files: usize, reached_scan_cap: bool, - ) -> ConversationsPage { - ConversationsPage { + ) -> ThreadsPage { + ThreadsPage { items, next_cursor, num_scanned_files, @@ -1144,13 +1144,13 @@ mod tests { #[test] fn rows_from_items_preserves_backend_order() { // Construct two items with different timestamps and real user text. - let a = ConversationItem { + let a = ThreadItem { path: PathBuf::from("/tmp/a.jsonl"), head: head_with_ts_and_user_text("2025-01-01T00:00:00Z", &["A"]), created_at: Some("2025-01-01T00:00:00Z".into()), updated_at: Some("2025-01-01T00:00:00Z".into()), }; - let b = ConversationItem { + let b = ThreadItem { path: PathBuf::from("/tmp/b.jsonl"), head: head_with_ts_and_user_text("2025-01-02T00:00:00Z", &["B"]), created_at: Some("2025-01-02T00:00:00Z".into()), @@ -1166,7 +1166,7 @@ mod tests { #[test] fn row_uses_tail_timestamp_for_updated_at() { let head = head_with_ts_and_user_text("2025-01-01T00:00:00Z", &["Hello"]); - let item = ConversationItem { + let item = ThreadItem { path: PathBuf::from("/tmp/a.jsonl"), head, created_at: Some("2025-01-01T00:00:00Z".into()), @@ -1351,7 +1351,7 @@ mod tests { None, ); - let page = RolloutRecorder::list_conversations( + let page = RolloutRecorder::list_threads( &state.codex_home, PAGE_SIZE, None, diff --git a/codex-rs/tui/src/slash_command.rs b/codex-rs/tui/src/slash_command.rs index c6bd8a771e3..c4c09500138 100644 --- a/codex-rs/tui/src/slash_command.rs +++ b/codex-rs/tui/src/slash_command.rs @@ -19,6 +19,7 @@ pub enum SlashCommand { Review, New, Resume, + Rename, Init, Compact, // Undo, @@ -45,6 +46,7 @@ impl SlashCommand { SlashCommand::Compact => "summarize conversation to prevent hitting the context limit", SlashCommand::Review => "review my current changes and find issues", SlashCommand::Resume => "resume a saved chat", + SlashCommand::Rename => "rename the current session", // SlashCommand::Undo => "ask Codex to undo a turn", SlashCommand::Quit | SlashCommand::Exit => "exit Codex", SlashCommand::Diff => "show git diff (including untracked files)", @@ -83,6 +85,7 @@ impl SlashCommand { | SlashCommand::Logout => false, SlashCommand::Diff | SlashCommand::Mention + | SlashCommand::Rename | SlashCommand::Skills | SlashCommand::Status | SlashCommand::Ps diff --git a/codex-rs/tui/src/snapshots/codex_tui__app__tests__model_migration_prompt_shows_for_hidden_model.snap b/codex-rs/tui/src/snapshots/codex_tui__app__tests__model_migration_prompt_shows_for_hidden_model.snap new file mode 100644 index 00000000000..9016aebea84 --- /dev/null +++ b/codex-rs/tui/src/snapshots/codex_tui__app__tests__model_migration_prompt_shows_for_hidden_model.snap @@ -0,0 +1,10 @@ +--- +source: tui/src/app.rs +assertion_line: 1579 +expression: model_migration_copy_to_plain_text(©) +--- +Codex just got an upgrade. Introducing gpt-5.2-codex. + +Codex is now powered by gpt-5.2-codex, our latest frontier agentic coding model. It is smarter and faster than its predecessors and capable of long-running project-scale work. Learn more about gpt-5.2-codex at https://openai.com/index/introducing-gpt-5-2-codex + +You can continue using gpt-5.1-codex if you prefer. diff --git a/codex-rs/tui/src/status/card.rs b/codex-rs/tui/src/status/card.rs index 07cd5a19888..7bf066eebfe 100644 --- a/codex-rs/tui/src/status/card.rs +++ b/codex-rs/tui/src/status/card.rs @@ -11,7 +11,7 @@ use codex_core::protocol::NetworkAccess; use codex_core::protocol::SandboxPolicy; use codex_core::protocol::TokenUsage; use codex_core::protocol::TokenUsageInfo; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use ratatui::prelude::*; use ratatui::style::Stylize; @@ -74,7 +74,7 @@ pub(crate) fn new_status_output( auth_manager: &AuthManager, token_info: Option<&TokenUsageInfo>, total_usage: &TokenUsage, - session_id: &Option, + session_id: &Option, rate_limits: Option<&RateLimitSnapshotDisplay>, plan_type: Option, now: DateTime, @@ -103,7 +103,7 @@ impl StatusHistoryCell { auth_manager: &AuthManager, token_info: Option<&TokenUsageInfo>, total_usage: &TokenUsage, - session_id: &Option, + session_id: &Option, rate_limits: Option<&RateLimitSnapshotDisplay>, plan_type: Option, now: DateTime, diff --git a/codex-rs/tui/src/status/tests.rs b/codex-rs/tui/src/status/tests.rs index c6f6c735995..bb16f13328e 100644 --- a/codex-rs/tui/src/status/tests.rs +++ b/codex-rs/tui/src/status/tests.rs @@ -38,9 +38,8 @@ fn test_auth_manager(config: &Config) -> AuthManager { } fn token_info_for(model_slug: &str, config: &Config, usage: &TokenUsage) -> TokenUsageInfo { - let context_window = ModelsManager::construct_model_family_offline(model_slug, config) - .context_window - .or(config.model_context_window); + let context_window = + ModelsManager::construct_model_info_offline(model_slug, config).context_window; TokenUsageInfo { total_token_usage: usage.clone(), last_token_usage: usage.clone(), diff --git a/codex-rs/tui/src/tooltips.rs b/codex-rs/tui/src/tooltips.rs index 8a3a1c1e969..402a18ba2b1 100644 --- a/codex-rs/tui/src/tooltips.rs +++ b/codex-rs/tui/src/tooltips.rs @@ -2,15 +2,10 @@ use codex_core::features::FEATURES; use lazy_static::lazy_static; use rand::Rng; +const ANNOUNCEMENT_TIP_URL: &str = + "https://raw.githubusercontent.com/openai/codex/main/announcement_tip.toml"; const RAW_TOOLTIPS: &str = include_str!("../tooltips.txt"); -fn beta_tooltips() -> Vec<&'static str> { - FEATURES - .iter() - .filter_map(|spec| spec.stage.beta_announcement()) - .collect() -} - lazy_static! { static ref TOOLTIPS: Vec<&'static str> = RAW_TOOLTIPS .lines() @@ -25,9 +20,20 @@ lazy_static! { }; } -pub(crate) fn random_tooltip() -> Option<&'static str> { +fn beta_tooltips() -> Vec<&'static str> { + FEATURES + .iter() + .filter_map(|spec| spec.stage.beta_announcement()) + .collect() +} + +/// Pick a random tooltip to show to the user when starting Codex. +pub(crate) fn random_tooltip() -> Option { + if let Some(announcement) = announcement::fetch_announcement_tip() { + return Some(announcement); + } let mut rng = rand::rng(); - pick_tooltip(&mut rng) + pick_tooltip(&mut rng).map(str::to_string) } fn pick_tooltip(rng: &mut R) -> Option<&'static str> { @@ -40,9 +46,149 @@ fn pick_tooltip(rng: &mut R) -> Option<&'static str> { } } +pub(crate) mod announcement { + use crate::tooltips::ANNOUNCEMENT_TIP_URL; + use crate::version::CODEX_CLI_VERSION; + use chrono::NaiveDate; + use chrono::Utc; + use regex_lite::Regex; + use serde::Deserialize; + use std::sync::OnceLock; + use std::thread; + use std::time::Duration; + + static ANNOUNCEMENT_TIP: OnceLock> = OnceLock::new(); + + /// Prewarm the cache of the announcement tip. + pub(crate) fn prewarm() { + let _ = thread::spawn(|| ANNOUNCEMENT_TIP.get_or_init(init_announcement_tip_in_thread)); + } + + /// Fetch the announcement tip, return None if the prewarm is not done yet. + pub(crate) fn fetch_announcement_tip() -> Option { + ANNOUNCEMENT_TIP + .get() + .cloned() + .flatten() + .and_then(|raw| parse_announcement_tip_toml(&raw)) + } + + #[derive(Debug, Deserialize)] + struct AnnouncementTipRaw { + content: String, + from_date: Option, + to_date: Option, + version_regex: Option, + target_app: Option, + } + + #[derive(Debug, Deserialize)] + struct AnnouncementTipDocument { + announcements: Vec, + } + + #[derive(Debug)] + struct AnnouncementTip { + content: String, + from_date: Option, + to_date: Option, + version_regex: Option, + target_app: String, + } + + fn init_announcement_tip_in_thread() -> Option { + thread::spawn(blocking_init_announcement_tip) + .join() + .ok() + .flatten() + } + + fn blocking_init_announcement_tip() -> Option { + let response = reqwest::blocking::Client::new() + .get(ANNOUNCEMENT_TIP_URL) + .timeout(Duration::from_millis(2000)) + .send() + .ok()?; + response.error_for_status().ok()?.text().ok() + } + + pub(crate) fn parse_announcement_tip_toml(text: &str) -> Option { + let announcements = toml::from_str::(text) + .map(|doc| doc.announcements) + .or_else(|_| toml::from_str::>(text)) + .ok()?; + + let mut latest_match = None; + let today = Utc::now().date_naive(); + for raw in announcements { + let Some(tip) = AnnouncementTip::from_raw(raw) else { + continue; + }; + if tip.version_matches(CODEX_CLI_VERSION) + && tip.date_matches(today) + && tip.target_app == "cli" + { + latest_match = Some(tip.content); + } + } + latest_match + } + + impl AnnouncementTip { + fn from_raw(raw: AnnouncementTipRaw) -> Option { + let content = raw.content.trim(); + if content.is_empty() { + return None; + } + + let from_date = match raw.from_date { + Some(date) => Some(NaiveDate::parse_from_str(&date, "%Y-%m-%d").ok()?), + None => None, + }; + let to_date = match raw.to_date { + Some(date) => Some(NaiveDate::parse_from_str(&date, "%Y-%m-%d").ok()?), + None => None, + }; + let version_regex = match raw.version_regex { + Some(pattern) => Some(Regex::new(&pattern).ok()?), + None => None, + }; + + Some(Self { + content: content.to_string(), + from_date, + to_date, + version_regex, + target_app: raw.target_app.unwrap_or("cli".to_string()).to_lowercase(), + }) + } + + fn version_matches(&self, version: &str) -> bool { + self.version_regex + .as_ref() + .is_none_or(|regex| regex.is_match(version)) + } + + fn date_matches(&self, today: NaiveDate) -> bool { + if let Some(from) = self.from_date + && today < from + { + return false; + } + if let Some(to) = self.to_date + && today >= to + { + return false; + } + true + } + } +} + #[cfg(test)] mod tests { use super::*; + use crate::tooltips::announcement::parse_announcement_tip_toml; use rand::SeedableRng; use rand::rngs::StdRng; @@ -62,4 +208,104 @@ mod tests { let mut rng = StdRng::seed_from_u64(7); assert_eq!(expected, pick_tooltip(&mut rng)); } + + #[test] + fn announcement_tip_toml_picks_last_matching() { + let toml = r#" +[[announcements]] +content = "first" +from_date = "2000-01-01" + +[[announcements]] +content = "latest match" +version_regex = ".*" +target_app = "cli" + +[[announcements]] +content = "should not match" +to_date = "2000-01-01" + "#; + + assert_eq!( + Some("latest match".to_string()), + parse_announcement_tip_toml(toml) + ); + + let toml = r#" +[[announcements]] +content = "first" +from_date = "2000-01-01" +target_app = "cli" + +[[announcements]] +content = "latest match" +version_regex = ".*" + +[[announcements]] +content = "should not match" +to_date = "2000-01-01" + "#; + + assert_eq!( + Some("latest match".to_string()), + parse_announcement_tip_toml(toml) + ); + } + + #[test] + fn announcement_tip_toml_picks_no_match() { + let toml = r#" +[[announcements]] +content = "first" +from_date = "2000-01-01" +to_date = "2000-01-05" + +[[announcements]] +content = "latest match" +version_regex = "invalid_version_name" + +[[announcements]] +content = "should not match either " +target_app = "vsce" + "#; + + assert_eq!(None, parse_announcement_tip_toml(toml)); + } + + #[test] + fn announcement_tip_toml_bad_deserialization() { + let toml = r#" +[[announcements]] +content = 123 +from_date = "2000-01-01" + "#; + + assert_eq!(None, parse_announcement_tip_toml(toml)); + } + + #[test] + fn announcement_tip_toml_parse_comments() { + let toml = r#" +# Example announcement tips for Codex TUI. +# Each [[announcements]] entry is evaluated in order; the last matching one is shown. +# Dates are UTC, formatted as YYYY-MM-DD. The from_date is inclusive and the to_date is exclusive. +# version_regex matches against the CLI version (env!("CARGO_PKG_VERSION")); omit to apply to all versions. +# target_app specify which app should display the announcement (cli, vsce, ...). + +[[announcements]] +content = "Welcome to Codex! Check out the new onboarding flow." +from_date = "2024-10-01" +to_date = "2024-10-15" +target_app = "cli" +version_regex = "^0\\.0\\.0$" + +[[announcements]] +content = "This is a test announcement" + "#; + + assert_eq!( + Some("This is a test announcement".to_string()), + parse_announcement_tip_toml(toml) + ); + } } diff --git a/codex-rs/tui2/Cargo.toml b/codex-rs/tui2/Cargo.toml index eb4e9cebde5..3108e5561e5 100644 --- a/codex-rs/tui2/Cargo.toml +++ b/codex-rs/tui2/Cargo.toml @@ -62,6 +62,7 @@ ratatui = { workspace = true, features = [ "unstable-rendered-line-info", "unstable-widget-ref", ] } +ratatui-core = { workspace = true } ratatui-macros = { workspace = true } regex-lite = { workspace = true } reqwest = { version = "0.12", features = ["json"] } @@ -73,6 +74,7 @@ strum_macros = { workspace = true } supports-color = { workspace = true } tempfile = { workspace = true } textwrap = { workspace = true } +tui-scrollbar = { workspace = true } tokio = { workspace = true, features = [ "io-std", "macros", diff --git a/codex-rs/tui2/docs/streaming_wrapping_design.md b/codex-rs/tui2/docs/streaming_wrapping_design.md index 28937c377ff..f7af4ccccba 100644 --- a/codex-rs/tui2/docs/streaming_wrapping_design.md +++ b/codex-rs/tui2/docs/streaming_wrapping_design.md @@ -1,85 +1,169 @@ -# Streaming Markdown Wrapping & Animation – TUI2 Notes - -This document mirrors the original `tui/streaming_wrapping_design.md` and -captures how the same concerns apply to the new `tui2` crate. It exists so that -future viewport and streaming work in TUI2 can rely on the same context without -having to cross‑reference the legacy TUI implementation. - -At a high level, the design constraints are the same: - -- Streaming agent responses are rendered incrementally, with an animation loop - that reveals content over time. -- Non‑streaming history cells are rendered width‑agnostically and wrapped only - at display time, so they reflow correctly when the terminal is resized. -- Streaming content should eventually follow the same “wrap on display” model so - the transcript reflows consistently across width changes, without regressing - animation or markdown semantics. - -## 1. Where streaming is implemented in TUI2 - -TUI2 keeps the streaming pipeline conceptually aligned with the legacy TUI but -in a separate crate: - -- `tui2/src/markdown_stream.rs` implements the markdown streaming collector and - animation controller for agent deltas. -- `tui2/src/chatwidget.rs` integrates streamed content into the transcript via - `HistoryCell` implementations. -- `tui2/src/history_cell.rs` provides the concrete history cell types used by - the inline transcript and overlays. -- `tui2/src/wrapping.rs` contains the shared text wrapping utilities used by - both streaming and non‑streaming render paths: - - `RtOptions` describes viewport‑aware wrapping (width, indents, algorithm). - - `word_wrap_line`, `word_wrap_lines`, and `word_wrap_lines_borrowed` provide - span‑aware wrapping that preserves markdown styling and emoji width. - -As in the original TUI, the key tension is between: - -- **Pre‑wrapping streamed content at commit time** (simpler animation, but - baked‑in splits that don’t reflow), and -- **Deferring wrapping to render time** (better reflow, but requires a more - sophisticated streaming cell model or recomputation on each frame). - -## 2. Current behavior and limitations - -TUI2 is intentionally conservative for now: - -- Streaming responses use the same markdown streaming and wrapping utilities as - the legacy TUI, with width decisions made near the streaming collector. -- The transcript viewport (`App::render_transcript_cells` in - `tui2/src/app.rs`) always uses `word_wrap_lines_borrowed` against the - current `Rect` width, so: - - Non‑streaming cells reflow naturally on resize. - - Streamed cells respect whatever wrapping was applied when their lines were - constructed, and may not fully “un‑wrap” if that work happened at a fixed - width earlier in the pipeline. - -This means TUI2 shares the same fundamental limitation documented in the -original design note: streamed paragraphs can retain historical wrap decisions -made at the time they were streamed, even if the viewport later grows wider. - -## 3. Design directions (forward‑looking) - -The options outlined in the legacy document apply here as well: - -1. **Keep the current behavior but clarify tests and documentation.** - - Ensure tests in `tui2/src/markdown_stream.rs`, `tui2/src/markdown_render.rs`, - `tui2/src/history_cell.rs`, and `tui2/src/wrapping.rs` encode the current - expectations around streaming, wrapping, and emoji / markdown styling. -2. **Move towards width‑agnostic streaming cells.** - - Introduce a dedicated streaming history cell that stores the raw markdown - buffer and lets `HistoryCell::display_lines(width)` perform both markdown - rendering and wrapping based on the current viewport width. - - Keep the commit animation logic expressed in terms of “logical” positions - (e.g., number of tokens or lines committed) rather than pre‑wrapped visual - lines at a fixed width. -3. **Hybrid “visual line count” model.** - - Track committed visual lines as a scalar and re‑render the streamed prefix - at the current width, revealing only the first `N` visual lines on each - animation tick. - -TUI2 does not yet implement these refactors; it intentionally stays close to -the legacy behavior while the viewport work (scrolling, selection, exit -transcripts) is being ported. This document exists to make that trade‑off -explicit for TUI2 and to provide a natural home for any TUI2‑specific streaming -wrapping notes as the design evolves. +# Streaming Wrapping Reflow (tui2) +This document describes a correctness bug in `codex-rs/tui2` and the chosen fix: +while streaming assistant markdown, soft-wrap decisions were effectively persisted as hard line +breaks, so resizing the viewport could not reflow prose. + +## Goal + +- Resizing the viewport reflows transcript prose (including streaming assistant output). +- Width-derived breaks are always treated as *soft wraps* (not logical newlines). +- Copy/paste continues to treat soft wraps as joinable (via joiners), and hard breaks as newlines. + +Non-goals: + +- Reflowing terminal scrollback that has already been printed. +- Reflowing content that is intentionally treated as preformatted (e.g., code blocks, raw stdout). + +## Background: where reflow happens in tui2 + +TUI2 renders the transcript as a list of `HistoryCell`s: + +1. A cell stores width-agnostic content (string, diff, logical lines, etc.). +2. At draw time (and on resize), `transcript_render` asks each cell for lines at the *current* + width (ideally via `HistoryCell::transcript_lines_with_joiners(width)`). +3. `TranscriptViewCache` caches the wrapped visual lines keyed by width; a width change triggers a + rebuild. + +This only works if cells do *not* persist width-derived wrapping inside their stored state. + +## The bug: soft wraps became hard breaks during streaming + +Ratatui represents multi-line content as `Vec`. If we split a paragraph into multiple `Line`s +because the viewport is narrow, that split is indistinguishable from an explicit newline unless we +also carry metadata describing which breaks were “soft”. + +Streaming assistant output used to generate already-wrapped `Line`s and store them inside the +history cell. Later, when the viewport became wider, the transcript renderer could not “un-split” +those baked lines — they looked like hard breaks. + +## Chosen solution (A, F1): stream logical markdown lines; wrap in the cell at render-time + +User choice recap: + +- **A**: Keep append-only streaming (new history cell per commit tick), but make the streamed data + width-agnostic. +- **F1**: Make the agent message cell responsible for wrapping-to-width so transcript-level wrapping + can be a no-op for it. + +### Key idea: separate markdown parsing from wrapping + +We introduce a width-agnostic “logical markdown line” representation that preserves the metadata +needed to wrap correctly later: + +- `codex-rs/tui2/src/markdown_render.rs` + - `MarkdownLogicalLine { content, initial_indent, subsequent_indent, line_style, is_preformatted }` + - `render_markdown_logical_lines(input: &str) -> Vec` + +This keeps: + +- hard breaks (paragraph/list boundaries, explicit newlines), +- markdown indentation rules for wraps (list markers, nested lists, blockquotes), +- preformatted runs (code blocks) stable. + +### Updated streaming pipeline + +- `codex-rs/tui2/src/markdown_stream.rs` + - `MarkdownStreamCollector` is newline-gated (no change), but now commits + `Vec` instead of already-wrapped `Vec`. + - Width is removed from the collector; wrapping is not performed during streaming. + +- `codex-rs/tui2/src/streaming/controller.rs` + - Emits `AgentMessageCell::new_logical(...)` containing logical lines. + +- `codex-rs/tui2/src/history_cell.rs` + - `AgentMessageCell` stores `Vec`. + - `HistoryCell::transcript_lines_with_joiners(width)` wraps each logical line at the current + width using `word_wrap_line_with_joiners` and composes indents as: + - transcript gutter prefix (`• ` / ` `), plus + - markdown-provided initial/subsequent indents. + - Preformatted logical lines are rendered without wrapping. + +Result: on resize, the transcript cache rebuilds against the new width and the agent output reflows +correctly because the stored content contains no baked soft wraps. + +## Overlay deferral fix (D): defer cells, not rendered lines + +When an overlay (transcript/static) is active, TUI2 is in alt screen and the normal terminal buffer +is not visible. Historically, `tui2` attempted to queue “history to print” for the normal buffer by +deferring *rendered lines*, which baked the then-current width. + +User choice recap: + +- **D**: Store deferred *cells* and render them at overlay close time. + +Implementation: + +- `codex-rs/tui2/src/app.rs` + - `deferred_history_cells: Vec>` (replaces `deferred_history_lines`). + - `AppEvent::InsertHistoryCell` pushes cells into the deferral list when `overlay.is_some()`. + +- `codex-rs/tui2/src/app_backtrack.rs` + - `close_transcript_overlay` renders deferred cells at the *current* width when closing the + overlay, then queues the resulting lines for the normal terminal buffer. + +Note: as of today, `Tui::insert_history_lines` queues lines but `Tui::draw` does not flush them into +the terminal (see `codex-rs/tui2/src/tui.rs`). This section is therefore best read as “behavior we +want when/if scrollback printing is re-enabled”, not a guarantee that content is printed during the +main TUI loop. For the current intended behavior around printing, see +`codex-rs/tui2/docs/tui_viewport_and_history.md`. + +## Tests (G2) + +User choice recap: + +- **G2**: Add resize reflow tests + snapshot coverage. + +Added coverage: + +- `codex-rs/tui2/src/history_cell.rs` + - `agent_message_cell_reflows_streamed_prose_on_resize` + - `agent_message_cell_reflows_streamed_prose_vt100_snapshot` + +These assert that a streamed agent cell produces fewer visual lines at wider widths and provide +snapshots showing reflow for list items and blockquotes. + +## Audit: other `HistoryCell`s and width-baked paths + +This section answers “what else might behave like this?” up front. + +### History cells + +- `AgentMessageCell` (`codex-rs/tui2/src/history_cell.rs`): **was affected**; now stores logical + markdown lines and wraps at render time. +- `UserHistoryCell` (`codex-rs/tui2/src/history_cell.rs`): wraps at render time from stored `String` + using `word_wrap_lines_with_joiners` (reflowable). +- `ReasoningSummaryCell` (`codex-rs/tui2/src/history_cell.rs`): renders from stored `String` on each + call; it does call `append_markdown(..., Some(width))`, but that wrapping is recomputed per width + (reflowable). +- `PrefixedWrappedHistoryCell` (`codex-rs/tui2/src/history_cell.rs`): wraps at render time and + returns joiners (reflowable). +- `PlainHistoryCell` (`codex-rs/tui2/src/history_cell.rs`): stores `Vec` and returns it + unchanged (not reflowable by design; used for already-structured/preformatted output). + +Rule of thumb: any cell that stores already-wrapped `Vec` for prose is a candidate for the +same bug; cells that store source text or logical lines and compute wrapping inside +`display_lines(width)` are safe. + +### Width-baked output outside the transcript model + +Even with the streaming fix, some paths are inherently width-baked: + +- Printed transcript after exit (`codex-rs/tui2/src/app.rs`): `AppExitInfo.session_lines` is rendered + once using the final width and then printed; it cannot reflow afterward. +- Optional scrollback insertion helper (`codex-rs/tui2/src/insert_history.rs`): once ANSI is written + to the terminal, that output cannot be reflowed later. This helper is currently used for + deterministic ANSI emission (`write_spans`) and tests; it is not wired into the main TUI draw + loop. +- Static overlays (`codex-rs/tui2/src/pager_overlay.rs`): reflow depends on whether callers provided + width-agnostic input; pre-split `Vec` cannot be “un-split” within the overlay. + +## Deferred / follow-ups + +The fix above is sufficient to unblock correct reflow on resize. Remaining choices can be deferred: + +- Streaming granularity: one logical line can wrap into multiple visual lines, so “commit tick” + updates can appear in larger chunks than before. If this becomes a UX issue, we can add a render- + time “progressive reveal” layer without reintroducing width baking. +- Expand logical-line rendering to other markdown-ish cells if needed (e.g., unify `append_markdown` + usage), but only if we find a concrete reflow bug beyond `AgentMessageCell`. diff --git a/codex-rs/tui2/src/app.rs b/codex-rs/tui2/src/app.rs index 84d16415d74..ead4135a4d7 100644 --- a/codex-rs/tui2/src/app.rs +++ b/codex-rs/tui2/src/app.rs @@ -20,6 +20,11 @@ use crate::transcript_copy_action::TranscriptCopyAction; use crate::transcript_copy_action::TranscriptCopyFeedback; use crate::transcript_copy_ui::TranscriptCopyUi; use crate::transcript_multi_click::TranscriptMultiClick; +use crate::transcript_scrollbar::render_transcript_scrollbar_if_active; +use crate::transcript_scrollbar::split_transcript_area; +use crate::transcript_scrollbar_ui::TranscriptScrollbarMouseEvent; +use crate::transcript_scrollbar_ui::TranscriptScrollbarMouseHandling; +use crate::transcript_scrollbar_ui::TranscriptScrollbarUi; use crate::transcript_selection::TRANSCRIPT_GUTTER_COLS; use crate::transcript_selection::TranscriptSelection; use crate::transcript_selection::TranscriptSelectionPoint; @@ -35,7 +40,7 @@ use crate::tui::scrolling::TranscriptScroll; use crate::update_action::UpdateAction; use codex_ansi_escape::ansi_escape_line; use codex_core::AuthManager; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::config::Config; use codex_core::config::edit::ConfigEditsBuilder; #[cfg(target_os = "windows")] @@ -51,7 +56,7 @@ use codex_core::protocol::SessionSource; use codex_core::protocol::SkillErrorInfo; use codex_core::protocol::TokenUsage; use codex_core::terminal::terminal_info; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelUpgrade; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; @@ -86,7 +91,7 @@ use crate::history_cell::UpdateAvailableHistoryCell; #[derive(Debug, Clone)] pub struct AppExitInfo { pub token_usage: TokenUsage, - pub conversation_id: Option, + pub conversation_id: Option, pub update_action: Option, /// ANSI-styled transcript lines to print after the TUI exits. /// @@ -100,7 +105,7 @@ impl From for codex_tui::AppExitInfo { fn from(info: AppExitInfo) -> Self { codex_tui::AppExitInfo { token_usage: info.token_usage, - conversation_id: info.conversation_id, + thread_id: info.conversation_id, update_action: info.update_action.map(Into::into), } } @@ -108,7 +113,7 @@ impl From for codex_tui::AppExitInfo { fn session_summary( token_usage: TokenUsage, - conversation_id: Option, + conversation_id: Option, ) -> Option { if token_usage.is_zero() { return None; @@ -315,7 +320,7 @@ async fn handle_model_migration_prompt_if_needed( } pub(crate) struct App { - pub(crate) server: Arc, + pub(crate) server: Arc, pub(crate) app_event_tx: AppEventSender, pub(crate) chat_widget: ChatWidget, pub(crate) auth_manager: Arc, @@ -337,11 +342,24 @@ pub(crate) struct App { transcript_total_lines: usize, transcript_copy_ui: TranscriptCopyUi, transcript_copy_action: TranscriptCopyAction, + transcript_scrollbar_ui: TranscriptScrollbarUi, - // Pager overlay state (Transcript or Static like Diff) + // Pager overlay state (Transcript or Static like Diff). pub(crate) overlay: Option, - pub(crate) deferred_history_lines: Vec>, - has_emitted_history_lines: bool, + /// History cells received while an overlay is active. + /// + /// While in an alt-screen overlay, the normal terminal buffer is not visible. + /// Instead we queue the incoming cells here and, on overlay close, render them at the *current* + /// width and queue them in one batch via `Tui::insert_history_lines`. + /// + /// This matters for correctness if/when scrollback printing is enabled: if we deferred + /// already-rendered `Vec`, we'd bake viewport-width wrapping based on the width at the + /// time the cell arrived (which may differ from the width when the overlay closes). + pub(crate) deferred_history_cells: Vec>, + /// True once at least one history cell has been inserted into terminal scrollback. + /// + /// Used to decide whether to insert an extra blank separator line when flushing deferred cells. + pub(crate) has_emitted_history_lines: bool, pub(crate) enhanced_keys_supported: bool, @@ -369,7 +387,7 @@ impl App { if let Some(conversation_id) = self.chat_widget.conversation_id() { self.suppress_shutdown_complete = true; self.chat_widget.submit_op(Op::Shutdown); - self.server.remove_conversation(&conversation_id).await; + self.server.remove_thread(&conversation_id).await; } } @@ -389,11 +407,12 @@ impl App { let (app_event_tx, mut app_event_rx) = unbounded_channel(); let app_event_tx = AppEventSender::new(app_event_tx); - let conversation_manager = Arc::new(ConversationManager::new( + let thread_manager = Arc::new(ThreadManager::new( + config.codex_home.clone(), auth_manager.clone(), SessionSource::Cli, )); - let mut model = conversation_manager + let mut model = thread_manager .get_models_manager() .get_model(&config.model, &config) .await; @@ -402,7 +421,7 @@ impl App { &mut config, model.as_str(), &app_event_tx, - conversation_manager.get_models_manager(), + thread_manager.get_models_manager(), ) .await; if let Some(exit_info) = exit_info { @@ -423,20 +442,16 @@ impl App { initial_images: initial_images.clone(), enhanced_keys_supported, auth_manager: auth_manager.clone(), - models_manager: conversation_manager.get_models_manager(), + models_manager: thread_manager.get_models_manager(), feedback: feedback.clone(), is_first_run, model: model.clone(), }; - ChatWidget::new(init, conversation_manager.clone()) + ChatWidget::new(init, thread_manager.clone()) } ResumeSelection::Resume(path) => { - let resumed = conversation_manager - .resume_conversation_from_rollout( - config.clone(), - path.clone(), - auth_manager.clone(), - ) + let resumed = thread_manager + .resume_thread_from_rollout(config.clone(), path.clone(), auth_manager.clone()) .await .wrap_err_with(|| { format!("Failed to resume session from {}", path.display()) @@ -449,16 +464,12 @@ impl App { initial_images: initial_images.clone(), enhanced_keys_supported, auth_manager: auth_manager.clone(), - models_manager: conversation_manager.get_models_manager(), + models_manager: thread_manager.get_models_manager(), feedback: feedback.clone(), is_first_run, model: model.clone(), }; - ChatWidget::new_from_existing( - init, - resumed.conversation, - resumed.session_configured, - ) + ChatWidget::new_from_existing(init, resumed.thread, resumed.session_configured) } }; @@ -485,7 +496,7 @@ impl App { let copy_selection_shortcut = crate::transcript_copy_ui::detect_copy_selection_shortcut(); let mut app = Self { - server: conversation_manager.clone(), + server: thread_manager.clone(), app_event_tx, chat_widget, auth_manager: auth_manager.clone(), @@ -503,8 +514,9 @@ impl App { transcript_total_lines: 0, transcript_copy_ui: TranscriptCopyUi::new_with_shortcut(copy_selection_shortcut), transcript_copy_action: TranscriptCopyAction::default(), + transcript_scrollbar_ui: TranscriptScrollbarUi::default(), overlay: None, - deferred_history_lines: Vec::new(), + deferred_history_cells: Vec::new(), has_emitted_history_lines: false, commit_anim_running: Arc::new(AtomicBool::new(false)), scroll_config, @@ -708,18 +720,19 @@ impl App { return area.y; } - let transcript_area = Rect { + let transcript_full_area = Rect { x: area.x, y: area.y, width: area.width, height: max_transcript_height, }; + let (transcript_area, _) = split_transcript_area(transcript_full_area); self.transcript_view_cache .ensure_wrapped(cells, transcript_area.width); let total_lines = self.transcript_view_cache.lines().len(); if total_lines == 0 { - Clear.render_ref(transcript_area, frame.buffer); + Clear.render_ref(transcript_full_area, frame.buffer); self.transcript_scroll = TranscriptScroll::default(); self.transcript_view_top = 0; self.transcript_total_lines = 0; @@ -760,12 +773,14 @@ impl App { ); } - let transcript_area = Rect { + let transcript_full_area = Rect { x: area.x, y: area.y, width: area.width, height: transcript_visible_height, }; + let (transcript_area, transcript_scrollbar_area) = + split_transcript_area(transcript_full_area); // Cache a few viewports worth of rasterized rows so redraws during streaming can cheaply // copy already-rendered `Cell`s instead of re-running grapheme segmentation. @@ -806,6 +821,13 @@ impl App { } else { self.transcript_copy_ui.clear_affordance(); } + render_transcript_scrollbar_if_active( + frame.buffer, + transcript_scrollbar_area, + total_lines, + max_visible, + top_offset, + ); chat_top } @@ -854,21 +876,45 @@ impl App { return; } - let transcript_area = Rect { + let transcript_full_area = Rect { x: 0, y: 0, width, height: transcript_height, }; + let (transcript_area, transcript_scrollbar_area) = + split_transcript_area(transcript_full_area); let base_x = transcript_area.x.saturating_add(TRANSCRIPT_GUTTER_COLS); let max_x = transcript_area.right().saturating_sub(1); + if matches!( + self.transcript_scrollbar_ui + .handle_mouse_event(TranscriptScrollbarMouseEvent { + tui, + mouse_event, + transcript_area, + scrollbar_area: transcript_scrollbar_area, + transcript_cells: &self.transcript_cells, + transcript_view_cache: &mut self.transcript_view_cache, + transcript_scroll: &mut self.transcript_scroll, + transcript_view_top: &mut self.transcript_view_top, + transcript_total_lines: &mut self.transcript_total_lines, + mouse_scroll_state: &mut self.scroll_state, + }), + TranscriptScrollbarMouseHandling::Handled + ) { + return; + } + // Treat the transcript as the only interactive region for transcript selection. // // This prevents clicks in the composer/footer from starting or extending a transcript // selection, while still allowing a left-click outside the transcript to clear an // existing highlight. - if mouse_event.row < transcript_area.y || mouse_event.row >= transcript_area.bottom() { + if !self.transcript_scrollbar_ui.pointer_capture_active() + && (mouse_event.row < transcript_full_area.y + || mouse_event.row >= transcript_full_area.bottom()) + { if matches!( mouse_event.kind, MouseEventKind::Down(MouseButton::Left) | MouseEventKind::Up(MouseButton::Left) @@ -1082,7 +1128,15 @@ impl App { return None; } - Some((transcript_height as usize, width)) + let transcript_full_area = Rect { + x: 0, + y: 0, + width, + height: transcript_height, + }; + let (transcript_area, _) = split_transcript_area(transcript_full_area); + + Some((transcript_height as usize, transcript_area.width)) } /// Scroll the transcript by a number of visual lines. @@ -1339,7 +1393,7 @@ impl App { ); match self .server - .resume_conversation_from_rollout( + .resume_thread_from_rollout( self.config.clone(), path.clone(), self.auth_manager.clone(), @@ -1363,7 +1417,7 @@ impl App { }; self.chat_widget = ChatWidget::new_from_existing( init, - resumed.conversation, + resumed.thread, resumed.session_configured, ); if let Some(summary) = summary { @@ -1400,21 +1454,8 @@ impl App { tui.frame_requester().schedule_frame(); } self.transcript_cells.push(cell.clone()); - let mut display = cell.display_lines(tui.terminal.last_known_screen_size.width); - if !display.is_empty() { - // Only insert a separating blank line for new cells that are not - // part of an ongoing stream. Streaming continuations should not - // accrue extra blank lines between chunks. - if !cell.is_stream_continuation() { - if self.has_emitted_history_lines { - display.insert(0, Line::from("")); - } else { - self.has_emitted_history_lines = true; - } - } - if self.overlay.is_some() { - self.deferred_history_lines.extend(display); - } + if self.overlay.is_some() { + self.deferred_history_cells.push(cell); } } AppEvent::StartCommitAnimation => { @@ -2039,24 +2080,27 @@ mod tests { use crate::tui::scrolling::TranscriptLineMeta; use codex_core::AuthManager; use codex_core::CodexAuth; - use codex_core::ConversationManager; + use codex_core::ThreadManager; + use codex_core::config::ConfigBuilder; use codex_core::protocol::AskForApproval; use codex_core::protocol::Event; use codex_core::protocol::EventMsg; use codex_core::protocol::SandboxPolicy; use codex_core::protocol::SessionConfiguredEvent; - use codex_protocol::ConversationId; + use codex_protocol::ThreadId; + use insta::assert_snapshot; use pretty_assertions::assert_eq; use ratatui::prelude::Line; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; + use tempfile::tempdir; async fn make_test_app() -> App { let (chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender().await; let config = chat_widget.config_ref().clone(); let current_model = "gpt-5.2-codex".to_string(); - let server = Arc::new(ConversationManager::with_models_provider( + let server = Arc::new(ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), )); @@ -2084,8 +2128,9 @@ mod tests { CopySelectionShortcut::CtrlShiftC, ), transcript_copy_action: TranscriptCopyAction::default(), + transcript_scrollbar_ui: TranscriptScrollbarUi::default(), overlay: None, - deferred_history_lines: Vec::new(), + deferred_history_cells: Vec::new(), has_emitted_history_lines: false, enhanced_keys_supported: false, commit_anim_running: Arc::new(AtomicBool::new(false)), @@ -2107,7 +2152,7 @@ mod tests { let (chat_widget, app_event_tx, rx, op_rx) = make_chatwidget_manual_with_sender().await; let config = chat_widget.config_ref().clone(); let current_model = "gpt-5.2-codex".to_string(); - let server = Arc::new(ConversationManager::with_models_provider( + let server = Arc::new(ThreadManager::with_models_provider( CodexAuth::from_api_key("Test API Key"), config.model_provider.clone(), )); @@ -2136,8 +2181,9 @@ mod tests { CopySelectionShortcut::CtrlShiftC, ), transcript_copy_action: TranscriptCopyAction::default(), + transcript_scrollbar_ui: TranscriptScrollbarUi::default(), overlay: None, - deferred_history_lines: Vec::new(), + deferred_history_cells: Vec::new(), has_emitted_history_lines: false, enhanced_keys_supported: false, commit_anim_running: Arc::new(AtomicBool::new(false)), @@ -2158,6 +2204,24 @@ mod tests { codex_core::models_manager::model_presets::all_model_presets().clone() } + fn model_migration_copy_to_plain_text( + copy: &crate::model_migration::ModelMigrationCopy, + ) -> String { + let mut s = String::new(); + for span in ©.heading { + s.push_str(&span.content); + } + s.push('\n'); + s.push('\n'); + for line in ©.content { + for span in &line.spans { + s.push_str(&span.content); + } + s.push('\n'); + } + s + } + #[tokio::test] async fn model_migration_prompt_only_shows_for_deprecated_models() { let seen = BTreeMap::new(); @@ -2193,6 +2257,59 @@ mod tests { )); } + #[tokio::test] + async fn model_migration_prompt_shows_for_hidden_model() { + let codex_home = tempdir().expect("temp codex home"); + let config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .build() + .await + .expect("config"); + + let available_models = all_model_presets(); + let current = available_models + .iter() + .find(|preset| preset.model == "gpt-5.1-codex") + .cloned() + .expect("gpt-5.1-codex preset present"); + assert!( + !current.show_in_picker, + "expected gpt-5.1-codex to be hidden from picker for this test" + ); + + let upgrade = current.upgrade.as_ref().expect("upgrade configured"); + assert!( + should_show_model_migration_prompt( + ¤t.model, + &upgrade.id, + &config.notices.model_migrations, + &available_models, + ), + "expected migration prompt to be eligible for hidden model" + ); + + let target = available_models + .iter() + .find(|preset| preset.model == upgrade.id) + .cloned() + .expect("upgrade target present"); + let target_description = + (!target.description.is_empty()).then(|| target.description.clone()); + let can_opt_out = true; + let copy = migration_copy_for_models( + ¤t.model, + &upgrade.id, + target.display_name, + target_description, + can_opt_out, + ); + + assert_snapshot!( + "model_migration_prompt_shows_for_hidden_model", + model_migration_copy_to_plain_text(©) + ); + } + #[tokio::test] async fn transcript_selection_copy_includes_offscreen_lines() { let mut app = make_test_app().await; @@ -2280,7 +2397,7 @@ mod tests { let make_header = |is_first| { let event = SessionConfiguredEvent { - session_id: ConversationId::new(), + session_id: ThreadId::new(), model: "gpt-test".to_string(), model_provider_id: "test-provider".to_string(), approval_policy: AskForApproval::Never, @@ -2318,7 +2435,7 @@ mod tests { assert_eq!(user_count(&app.transcript_cells), 2); - app.backtrack.base_id = Some(ConversationId::new()); + app.backtrack.base_id = Some(ThreadId::new()); app.backtrack.primed = true; app.backtrack.nth_user_message = user_count(&app.transcript_cells).saturating_sub(1); @@ -2573,7 +2690,7 @@ mod tests { async fn new_session_requests_shutdown_for_previous_conversation() { let (mut app, mut app_event_rx, mut op_rx) = make_test_app_with_channels().await; - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let event = SessionConfiguredEvent { session_id: conversation_id, model: "gpt-test".to_string(), @@ -2639,8 +2756,7 @@ mod tests { total_tokens: 12, ..Default::default() }; - let conversation = - ConversationId::from_string("123e4567-e89b-12d3-a456-426614174000").unwrap(); + let conversation = ThreadId::from_string("123e4567-e89b-12d3-a456-426614174000").unwrap(); let summary = session_summary(usage, Some(conversation)).expect("summary"); assert_eq!( diff --git a/codex-rs/tui2/src/app_backtrack.rs b/codex-rs/tui2/src/app_backtrack.rs index ce5dff2ed85..c5c2f0e95f8 100644 --- a/codex-rs/tui2/src/app_backtrack.rs +++ b/codex-rs/tui2/src/app_backtrack.rs @@ -9,7 +9,7 @@ use crate::pager_overlay::Overlay; use crate::tui; use crate::tui::TuiEvent; use codex_core::protocol::ConversationPathResponseEvent; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use color_eyre::eyre::Result; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; @@ -21,13 +21,13 @@ pub(crate) struct BacktrackState { /// True when Esc has primed backtrack mode in the main view. pub(crate) primed: bool, /// Session id of the base conversation to fork from. - pub(crate) base_id: Option, + pub(crate) base_id: Option, /// Index in the transcript of the last user message. pub(crate) nth_user_message: usize, /// True when the transcript overlay is showing a backtrack preview. pub(crate) overlay_preview_active: bool, /// Pending fork request: (base_id, nth_user_message, prefill). - pub(crate) pending: Option<(ConversationId, usize, String)>, + pub(crate) pending: Option<(ThreadId, usize, String)>, } impl App { @@ -99,7 +99,7 @@ impl App { pub(crate) fn request_backtrack( &mut self, prefill: String, - base_id: ConversationId, + base_id: ThreadId, nth_user_message: usize, ) { self.backtrack.pending = Some((base_id, nth_user_message, prefill)); @@ -123,12 +123,42 @@ impl App { } /// Close transcript overlay and restore normal UI. + /// + /// Any history emitted while the overlay was open is flushed to the normal-buffer queue here. + /// + /// Importantly, we defer *cells* (not rendered lines) so we can render them against the current + /// width on close and avoid baking width-derived wrapping based on an earlier viewport size. + /// (This matters if/when scrollback printing is enabled; `Tui::insert_history_lines` currently + /// queues lines without printing them during the main draw loop.) pub(crate) fn close_transcript_overlay(&mut self, tui: &mut tui::Tui) { let _ = tui.leave_alt_screen(); let was_backtrack = self.backtrack.overlay_preview_active; - if !self.deferred_history_lines.is_empty() { - let lines = std::mem::take(&mut self.deferred_history_lines); - tui.insert_history_lines(lines); + if !self.deferred_history_cells.is_empty() { + let cells = std::mem::take(&mut self.deferred_history_cells); + let width = tui.terminal.last_known_screen_size.width; + let mut lines: Vec> = Vec::new(); + for cell in cells { + let mut display = cell.display_lines(width); + if display.is_empty() { + continue; + } + + // Only insert a separating blank line for new cells that are not part of an + // ongoing stream. Streaming continuations should not accrue extra blank lines + // between chunks. + if !cell.is_stream_continuation() { + if self.has_emitted_history_lines { + display.insert(0, ratatui::text::Line::from("")); + } else { + self.has_emitted_history_lines = true; + } + } + + lines.extend(display); + } + if !lines.is_empty() { + tui.insert_history_lines(lines); + } } self.overlay = None; self.backtrack.overlay_preview_active = false; @@ -278,7 +308,7 @@ impl App { } /// Handle a ConversationHistory response while a backtrack is pending. - /// If it matches the primed base session, fork and switch to the new conversation. + /// If it matches the primed base session, fork and switch to the new thread. pub(crate) async fn on_conversation_history_for_backtrack( &mut self, tui: &mut tui::Tui, @@ -294,7 +324,7 @@ impl App { Ok(()) } - /// Fork the conversation using provided history and switch UI/state accordingly. + /// Fork the thread using provided history and switch UI/state accordingly. async fn fork_and_switch_to_new_conversation( &mut self, tui: &mut tui::Tui, @@ -315,28 +345,26 @@ impl App { } } - /// Thin wrapper around ConversationManager::fork_conversation. + /// Thin wrapper around ThreadManager::fork_thread. async fn perform_fork( &self, path: PathBuf, nth_user_message: usize, cfg: codex_core::config::Config, - ) -> codex_core::error::Result { - self.server - .fork_conversation(nth_user_message, cfg, path) - .await + ) -> codex_core::error::Result { + self.server.fork_thread(nth_user_message, cfg, path).await } - /// Install a forked conversation into the ChatWidget and update UI to reflect selection. + /// Install a forked thread into the ChatWidget and update UI to reflect selection. fn install_forked_conversation( &mut self, tui: &mut tui::Tui, cfg: codex_core::config::Config, - new_conv: codex_core::NewConversation, + new_conv: codex_core::NewThread, nth_user_message: usize, prefill: &str, ) { - let conv = new_conv.conversation; + let thread = new_conv.thread; let session_configured = new_conv.session_configured; let init = crate::chatwidget::ChatWidgetInit { config: cfg, @@ -352,7 +380,7 @@ impl App { is_first_run: false, }; self.chat_widget = - crate::chatwidget::ChatWidget::new_from_existing(init, conv, session_configured); + crate::chatwidget::ChatWidget::new_from_existing(init, thread, session_configured); // Trim transcript up to the selected user message and re-render it. self.trim_transcript_for_backtrack(nth_user_message); self.render_transcript_once(tui); diff --git a/codex-rs/tui2/src/bottom_pane/approval_overlay.rs b/codex-rs/tui2/src/bottom_pane/approval_overlay.rs index d42861eb1d5..0f0445fee83 100644 --- a/codex-rs/tui2/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui2/src/bottom_pane/approval_overlay.rs @@ -461,9 +461,13 @@ fn exec_options( .chain( proposed_execpolicy_amendment .filter(|_| features.enabled(Feature::ExecPolicy)) - .map(|prefix| { + .and_then(|prefix| { let rendered_prefix = strip_bash_lc_and_escape(prefix.command()); - ApprovalOption { + if rendered_prefix.contains('\n') || rendered_prefix.contains('\r') { + return None; + } + + Some(ApprovalOption { label: format!( "Yes, and don't ask again for commands that start with `{rendered_prefix}`" ), @@ -474,7 +478,7 @@ fn exec_options( ), display_shortcut: None, additional_shortcuts: vec![key_hint::plain(KeyCode::Char('p'))], - } + }) }), ) .chain([ApprovalOption { @@ -494,6 +498,12 @@ fn patch_options() -> Vec { display_shortcut: None, additional_shortcuts: vec![key_hint::plain(KeyCode::Char('y'))], }, + ApprovalOption { + label: "Yes, and don't ask again for these files".to_string(), + decision: ApprovalDecision::Review(ReviewDecision::ApprovedForSession), + display_shortcut: None, + additional_shortcuts: vec![key_hint::plain(KeyCode::Char('a'))], + }, ApprovalOption { label: "No, and tell Codex what to do differently".to_string(), decision: ApprovalDecision::Review(ReviewDecision::Abort), diff --git a/codex-rs/tui2/src/bottom_pane/chat_composer.rs b/codex-rs/tui2/src/bottom_pane/chat_composer.rs index 0073173fdc7..d1131000315 100644 --- a/codex-rs/tui2/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui2/src/bottom_pane/chat_composer.rs @@ -76,6 +76,7 @@ const LARGE_PASTE_CHAR_THRESHOLD: usize = 1000; pub enum InputResult { Submitted(String), Command(SlashCommand), + CommandWithArgs(SlashCommand, String), None, } @@ -112,6 +113,9 @@ pub(crate) struct ChatComposer { attached_images: Vec, placeholder_text: String, is_task_running: bool, + /// When false, the composer is temporarily read-only (e.g. during sandbox setup). + input_enabled: bool, + input_disabled_placeholder: Option, // Non-bracketed paste burst tracker. paste_burst: PasteBurst, // When true, disables paste-burst logic and inserts characters immediately. @@ -167,6 +171,8 @@ impl ChatComposer { attached_images: Vec::new(), placeholder_text, is_task_running: false, + input_enabled: true, + input_disabled_placeholder: None, paste_burst: PasteBurst::default(), disable_paste_burst: false, custom_prompts: Vec::new(), @@ -404,6 +410,10 @@ impl ChatComposer { /// Handle a key event coming from the main UI. pub fn handle_key_event(&mut self, key_event: KeyEvent) -> (InputResult, bool) { + if !self.input_enabled { + return (InputResult::None, false); + } + let result = match &mut self.active_popup { ActivePopup::Command(_) => self.handle_key_event_with_slash_popup(key_event), ActivePopup::File(_) => self.handle_key_event_with_file_popup(key_event), @@ -600,6 +610,42 @@ impl ChatComposer { if self.paste_burst.try_append_char_if_active(ch, now) { return (InputResult::None, true); } + // Non-ASCII input often comes from IMEs and can arrive in quick bursts. + // We do not want to hold the first char (flicker suppression) on this path, but we + // still want to detect paste-like bursts. Before applying any non-ASCII input, flush + // any existing burst buffer (including a pending first char from the ASCII path) so + // we don't carry that transient state forward. + if let Some(pasted) = self.paste_burst.flush_before_modified_input() { + self.handle_paste(pasted); + } + if let Some(decision) = self.paste_burst.on_plain_char_no_hold(now) { + match decision { + CharDecision::BufferAppend => { + self.paste_burst.append_char_to_buffer(ch, now); + return (InputResult::None, true); + } + CharDecision::BeginBuffer { retro_chars } => { + let cur = self.textarea.cursor(); + let txt = self.textarea.text(); + let safe_cur = Self::clamp_to_char_boundary(txt, cur); + let before = &txt[..safe_cur]; + // If decision is to buffer, seed the paste burst buffer with the grabbed chars + new. + // Otherwise, fall through to normal insertion below. + if let Some(grab) = + self.paste_burst + .decide_begin_buffer(now, before, retro_chars as usize) + { + if !grab.grabbed.is_empty() { + self.textarea.replace_range(grab.start_byte..safe_cur, ""); + } + // seed the paste burst buffer with everything (grabbed + new) + self.paste_burst.append_char_to_buffer(ch, now); + return (InputResult::None, true); + } + } + _ => unreachable!("on_plain_char_no_hold returned unexpected variant"), + } + } } if let Some(pasted) = self.paste_burst.flush_before_modified_input() { self.handle_paste(pasted); @@ -1191,6 +1237,18 @@ impl ChatComposer { } } + if !input_starts_with_space + && let Some((name, rest)) = parse_slash_name(&text) + && !rest.is_empty() + && !name.contains('/') + && let Some((_n, cmd)) = built_in_slash_commands() + .into_iter() + .find(|(command_name, _)| *command_name == name) + && matches!(cmd, SlashCommand::Review | SlashCommand::Rename) + { + return (InputResult::CommandWithArgs(cmd, rest.to_string()), true); + } + let expanded_prompt = match expand_custom_prompt(&text, &self.custom_prompts) { Ok(expanded) => expanded, Err(err) => { @@ -1263,9 +1321,8 @@ impl ChatComposer { { let has_ctrl_or_alt = has_ctrl_or_alt(modifiers); if !has_ctrl_or_alt { - // Non-ASCII characters (e.g., from IMEs) can arrive in quick bursts and be - // misclassified by paste heuristics. Flush any active burst buffer and insert - // non-ASCII characters directly. + // Non-ASCII characters (e.g., from IMEs) can arrive in quick bursts, so avoid + // holding the first char while still allowing burst detection for paste input. if !ch.is_ascii() { return self.handle_non_ascii_char(input); } @@ -1287,7 +1344,6 @@ impl ChatComposer { if !grab.grabbed.is_empty() { self.textarea.replace_range(grab.start_byte..safe_cur, ""); } - self.paste_burst.begin_with_retro_grabbed(grab.grabbed, now); self.paste_burst.append_char_to_buffer(ch, now); return (InputResult::None, true); } @@ -1524,7 +1580,8 @@ impl ChatComposer { let toggles = matches!(key_event.code, KeyCode::Char('?')) && !has_ctrl_or_alt(key_event.modifiers) - && self.is_empty(); + && self.is_empty() + && !self.is_in_paste_burst(); if !toggles { return false; @@ -1806,6 +1863,17 @@ impl ChatComposer { self.has_focus = has_focus; } + #[allow(dead_code)] + pub(crate) fn set_input_enabled(&mut self, enabled: bool, placeholder: Option) { + self.input_enabled = enabled; + self.input_disabled_placeholder = if enabled { None } else { placeholder }; + + // Avoid leaving interactive popups open while input is blocked. + if !enabled && !matches!(self.active_popup, ActivePopup::None) { + self.active_popup = ActivePopup::None; + } + } + pub fn set_task_running(&mut self, running: bool) { self.is_task_running = running; } @@ -1831,6 +1899,10 @@ impl ChatComposer { impl Renderable for ChatComposer { fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> { + if !self.input_enabled { + return None; + } + let [_, textarea_rect, _] = self.layout_areas(area); let state = *self.textarea_state.borrow(); self.textarea.cursor_pos_with_state(textarea_rect, state) @@ -1909,10 +1981,15 @@ impl Renderable for ChatComposer { let style = user_message_style(); Block::default().style(style).render_ref(composer_rect, buf); if !textarea_rect.is_empty() { + let prompt = if self.input_enabled { + "›".bold() + } else { + "›".dim() + }; buf.set_span( textarea_rect.x - LIVE_PREFIX_COLS, textarea_rect.y, - &"›".bold(), + &prompt, textarea_rect.width, ); } @@ -1920,7 +1997,15 @@ impl Renderable for ChatComposer { let mut state = self.textarea_state.borrow_mut(); StatefulWidgetRef::render_ref(&(&self.textarea), textarea_rect, buf, &mut state); if self.textarea.text().is_empty() { - let placeholder = Span::from(self.placeholder_text.as_str()).dim(); + let text = if self.input_enabled { + self.placeholder_text.as_str().to_string() + } else { + self.input_disabled_placeholder + .as_deref() + .unwrap_or("Input disabled.") + .to_string() + }; + let placeholder = Span::from(text).dim().italic(); Line::from(vec![placeholder]).render_ref(textarea_rect.inner(Margin::new(0, 0)), buf); } } @@ -2213,13 +2298,46 @@ mod tests { composer.handle_key_event(KeyEvent::new(KeyCode::Char('?'), KeyModifiers::NONE)); assert_eq!(result, InputResult::None); assert!(needs_redraw, "typing should still mark the view dirty"); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let _ = composer.flush_paste_burst_if_due(); + let _ = flush_after_paste_burst(&mut composer); assert_eq!(composer.textarea.text(), "h?"); assert_eq!(composer.footer_mode, FooterMode::ShortcutSummary); assert_eq!(composer.footer_mode(), FooterMode::ContextOnly); } + #[test] + fn question_mark_does_not_toggle_during_paste_burst() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + // Force an active paste burst so this test doesn't depend on tight timing. + composer + .paste_burst + .begin_with_retro_grabbed(String::new(), Instant::now()); + + for ch in ['h', 'i', '?', 't', 'h', 'e', 'r', 'e'] { + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(ch), KeyModifiers::NONE)); + } + assert!(composer.is_in_paste_burst()); + assert_eq!(composer.textarea.text(), ""); + + let flushed = flush_after_paste_burst(&mut composer); + assert!(flushed, "expected buffered text to flush after stop"); + + assert_eq!(composer.textarea.text(), "hi?there"); + assert_ne!(composer.footer_mode, FooterMode::ShortcutOverlay); + } + #[test] fn shortcut_overlay_persists_while_task_running() { use crossterm::event::KeyCode; @@ -2423,6 +2541,93 @@ mod tests { } } + #[test] + fn non_ascii_burst_handles_newline() { + let test_cases = [ + // triggers on windows + "天地玄黄 宇宙洪荒 +日月盈昃 辰宿列张 +寒来暑往 秋收冬藏 + +你好世界 编码测试 +汉字处理 UTF-8 +终端显示 正确无误 + +风吹竹林 月照大江 +白云千载 青山依旧 +程序员 与 Unicode 同行", + // Simulate pasting "你 好\nhi" with an ideographic space to trigger pastey heuristics. + "你 好\nhi", + ]; + + for test_case in test_cases { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + for c in test_case.chars() { + let _ = + composer.handle_key_event(KeyEvent::new(KeyCode::Char(c), KeyModifiers::NONE)); + } + + assert!( + composer.textarea.text().is_empty(), + "non-empty textarea before flush: {test_case}", + ); + let _ = flush_after_paste_burst(&mut composer); + assert_eq!(composer.textarea.text(), test_case); + } + } + + #[test] + fn ascii_burst_treats_enter_as_newline() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + // Force an active burst so this test doesn't depend on tight timing. + composer + .paste_burst + .begin_with_retro_grabbed(String::new(), Instant::now()); + + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('h'), KeyModifiers::NONE)); + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('i'), KeyModifiers::NONE)); + + let (result, _) = + composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); + assert!( + matches!(result, InputResult::None), + "Enter during a burst should insert newline, not submit" + ); + + for ch in ['t', 'h', 'e', 'r', 'e'] { + let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(ch), KeyModifiers::NONE)); + } + + let _ = flush_after_paste_burst(&mut composer); + assert_eq!(composer.textarea.text(), "hi\nthere"); + } + #[test] fn handle_paste_small_inserts_text() { use crossterm::event::KeyCode; @@ -2725,6 +2930,11 @@ mod tests { } } + fn flush_after_paste_burst(composer: &mut ChatComposer) -> bool { + std::thread::sleep(PasteBurst::recommended_active_flush_delay()); + composer.flush_paste_burst_if_due() + } + #[test] fn slash_init_dispatches_command_and_does_not_submit_literal_text() { use crossterm::event::KeyCode; @@ -2754,6 +2964,9 @@ mod tests { InputResult::Command(cmd) => { assert_eq!(cmd.command(), "init"); } + InputResult::CommandWithArgs(_, _) => { + panic!("expected command dispatch without args for '/init'") + } InputResult::Submitted(text) => { panic!("expected command dispatch, but composer submitted literal text: {text}") } @@ -2762,6 +2975,44 @@ mod tests { assert!(composer.textarea.is_empty(), "composer should be cleared"); } + #[test] + fn slash_review_with_args_dispatches_command_with_args() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + type_chars_humanlike(&mut composer, &['/', 'r', 'e', 'v', 'i', 'e', 'w', ' ']); + type_chars_humanlike(&mut composer, &['f', 'i', 'x', ' ', 't', 'h', 'i', 's']); + + let (result, _needs_redraw) = + composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); + + match result { + InputResult::CommandWithArgs(cmd, args) => { + assert_eq!(cmd, SlashCommand::Review); + assert_eq!(args, "fix this"); + } + InputResult::Command(cmd) => { + panic!("expected args for '/review', got bare command: {cmd:?}") + } + InputResult::Submitted(text) => { + panic!("expected command dispatch, got literal submit: {text}") + } + InputResult::None => panic!("expected CommandWithArgs result for '/review'"), + } + assert!(composer.textarea.is_empty(), "composer should be cleared"); + } + #[test] fn extract_args_supports_quoted_paths_single_arg() { let args = extract_positional_args_for_prompt_line( @@ -2827,6 +3078,9 @@ mod tests { composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); match result { InputResult::Command(cmd) => assert_eq!(cmd.command(), "diff"), + InputResult::CommandWithArgs(_, _) => { + panic!("expected command dispatch without args for '/diff'") + } InputResult::Submitted(text) => { panic!("expected command dispatch after Tab completion, got literal submit: {text}") } @@ -2860,6 +3114,9 @@ mod tests { InputResult::Command(cmd) => { assert_eq!(cmd.command(), "mention"); } + InputResult::CommandWithArgs(_, _) => { + panic!("expected command dispatch without args for '/mention'") + } InputResult::Submitted(text) => { panic!("expected command dispatch, but composer submitted literal text: {text}") } @@ -3905,8 +4162,7 @@ mod tests { composer.textarea.text().is_empty(), "text should remain empty until flush" ); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let flushed = composer.flush_paste_burst_if_due(); + let flushed = flush_after_paste_burst(&mut composer); assert!(flushed, "expected buffered text to flush after stop"); assert_eq!(composer.textarea.text(), "a".repeat(count)); assert!( @@ -3939,8 +4195,7 @@ mod tests { // Nothing should appear until we stop and flush assert!(composer.textarea.text().is_empty()); - std::thread::sleep(ChatComposer::recommended_paste_flush_delay()); - let flushed = composer.flush_paste_burst_if_due(); + let flushed = flush_after_paste_burst(&mut composer); assert!(flushed, "expected flush after stopping fast input"); let expected_placeholder = format!("[Pasted Content {count} chars]"); @@ -4048,4 +4303,38 @@ mod tests { "'/zzz' should not activate slash popup because it is not a prefix of any built-in command" ); } + #[test] + fn input_disabled_ignores_keypresses_and_hides_cursor() { + use crossterm::event::KeyCode; + use crossterm::event::KeyEvent; + use crossterm::event::KeyModifiers; + + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = ChatComposer::new( + true, + sender, + false, + "Ask Codex to do anything".to_string(), + false, + ); + + composer.set_text_content("hello".to_string()); + composer.set_input_enabled(false, Some("Input disabled for test.".to_string())); + + let (result, needs_redraw) = + composer.handle_key_event(KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE)); + + assert_eq!(result, InputResult::None); + assert!(!needs_redraw); + assert_eq!(composer.current_text(), "hello"); + + let area = Rect { + x: 0, + y: 0, + width: 40, + height: 5, + }; + assert_eq!(composer.cursor_pos(area), None); + } } diff --git a/codex-rs/tui2/src/bottom_pane/list_selection_view.rs b/codex-rs/tui2/src/bottom_pane/list_selection_view.rs index 46d6daac601..27c7dc4233e 100644 --- a/codex-rs/tui2/src/bottom_pane/list_selection_view.rs +++ b/codex-rs/tui2/src/bottom_pane/list_selection_view.rs @@ -13,6 +13,7 @@ use ratatui::widgets::Block; use ratatui::widgets::Paragraph; use ratatui::widgets::Widget; +use super::selection_popup_common::wrap_styled_line; use crate::app_event_sender::AppEventSender; use crate::key_hint::KeyBinding; use crate::render::Insets; @@ -49,6 +50,7 @@ pub(crate) struct SelectionItem { pub(crate) struct SelectionViewParams { pub title: Option, pub subtitle: Option, + pub footer_note: Option>, pub footer_hint: Option>, pub items: Vec, pub is_searchable: bool, @@ -62,6 +64,7 @@ impl Default for SelectionViewParams { Self { title: None, subtitle: None, + footer_note: None, footer_hint: None, items: Vec::new(), is_searchable: false, @@ -73,6 +76,7 @@ impl Default for SelectionViewParams { } pub(crate) struct ListSelectionView { + footer_note: Option>, footer_hint: Option>, items: Vec, state: ScrollState, @@ -100,6 +104,7 @@ impl ListSelectionView { ])); } let mut s = Self { + footer_note: params.footer_note, footer_hint: params.footer_hint, items: params.items, state: ScrollState::new(), @@ -391,6 +396,11 @@ impl Renderable for ListSelectionView { if self.is_searchable { height = height.saturating_add(1); } + if let Some(note) = &self.footer_note { + let note_width = width.saturating_sub(2); + let note_lines = wrap_styled_line(note, note_width); + height = height.saturating_add(note_lines.len() as u16); + } if self.footer_hint.is_some() { height = height.saturating_add(1); } @@ -402,11 +412,15 @@ impl Renderable for ListSelectionView { return; } - let [content_area, footer_area] = Layout::vertical([ - Constraint::Fill(1), - Constraint::Length(if self.footer_hint.is_some() { 1 } else { 0 }), - ]) - .areas(area); + let note_width = area.width.saturating_sub(2); + let note_lines = self + .footer_note + .as_ref() + .map(|note| wrap_styled_line(note, note_width)); + let note_height = note_lines.as_ref().map_or(0, |lines| lines.len() as u16); + let footer_rows = note_height + u16::from(self.footer_hint.is_some()); + let [content_area, footer_area] = + Layout::vertical([Constraint::Fill(1), Constraint::Length(footer_rows)]).areas(area); Block::default() .style(user_message_style()) @@ -474,14 +488,43 @@ impl Renderable for ListSelectionView { ); } - if let Some(hint) = &self.footer_hint { - let hint_area = Rect { - x: footer_area.x + 2, - y: footer_area.y, - width: footer_area.width.saturating_sub(2), - height: footer_area.height, - }; - hint.clone().dim().render(hint_area, buf); + if footer_area.height > 0 { + let [note_area, hint_area] = Layout::vertical([ + Constraint::Length(note_height), + Constraint::Length(if self.footer_hint.is_some() { 1 } else { 0 }), + ]) + .areas(footer_area); + + if let Some(lines) = note_lines { + let note_area = Rect { + x: note_area.x + 2, + y: note_area.y, + width: note_area.width.saturating_sub(2), + height: note_area.height, + }; + for (idx, line) in lines.iter().enumerate() { + if idx as u16 >= note_area.height { + break; + } + let line_area = Rect { + x: note_area.x, + y: note_area.y + idx as u16, + width: note_area.width, + height: 1, + }; + line.clone().render(line_area, buf); + } + } + + if let Some(hint) = &self.footer_hint { + let hint_area = Rect { + x: hint_area.x + 2, + y: hint_area.y, + width: hint_area.width.saturating_sub(2), + height: hint_area.height, + }; + hint.clone().dim().render(hint_area, buf); + } } } } @@ -568,6 +611,38 @@ mod tests { assert_snapshot!("list_selection_spacing_with_subtitle", render_lines(&view)); } + #[test] + fn snapshot_footer_note_wraps() { + let (tx_raw, _rx) = unbounded_channel::(); + let tx = AppEventSender::new(tx_raw); + let items = vec![SelectionItem { + name: "Read Only".to_string(), + description: Some("Codex can read files".to_string()), + is_current: true, + dismiss_on_select: true, + ..Default::default() + }]; + let footer_note = Line::from(vec![ + "Note: ".dim(), + "Use /setup-elevated-sandbox".cyan(), + " to allow network access.".dim(), + ]); + let view = ListSelectionView::new( + SelectionViewParams { + title: Some("Select Approval Mode".to_string()), + footer_note: Some(footer_note), + footer_hint: Some(standard_popup_hint_line()), + items, + ..Default::default() + }, + tx, + ); + assert_snapshot!( + "list_selection_footer_note_wraps", + render_lines_with_width(&view, 40) + ); + } + #[test] fn renders_search_query_line_when_enabled() { let (tx_raw, _rx) = unbounded_channel::(); diff --git a/codex-rs/tui2/src/bottom_pane/mod.rs b/codex-rs/tui2/src/bottom_pane/mod.rs index 2ebd0715e7d..4b6caf0d1aa 100644 --- a/codex-rs/tui2/src/bottom_pane/mod.rs +++ b/codex-rs/tui2/src/bottom_pane/mod.rs @@ -256,6 +256,16 @@ impl BottomPane { self.request_redraw(); } + #[allow(dead_code)] + pub(crate) fn set_composer_input_enabled( + &mut self, + enabled: bool, + placeholder: Option, + ) { + self.composer.set_input_enabled(enabled, placeholder); + self.request_redraw(); + } + pub(crate) fn clear_composer_for_ctrl_c(&mut self) { self.composer.clear_for_ctrl_c(); self.request_redraw(); diff --git a/codex-rs/tui2/src/bottom_pane/paste_burst.rs b/codex-rs/tui2/src/bottom_pane/paste_burst.rs index 49377cb21c5..96ed095b8f3 100644 --- a/codex-rs/tui2/src/bottom_pane/paste_burst.rs +++ b/codex-rs/tui2/src/bottom_pane/paste_burst.rs @@ -6,6 +6,12 @@ use std::time::Instant; const PASTE_BURST_MIN_CHARS: u16 = 3; const PASTE_BURST_CHAR_INTERVAL: Duration = Duration::from_millis(8); const PASTE_ENTER_SUPPRESS_WINDOW: Duration = Duration::from_millis(120); +// Slower paste burts have been observed in windows environments, but ideally +// we want to keep this low +#[cfg(not(windows))] +const PASTE_BURST_ACTIVE_IDLE_TIMEOUT: Duration = Duration::from_millis(8); +#[cfg(windows)] +const PASTE_BURST_ACTIVE_IDLE_TIMEOUT: Duration = Duration::from_millis(60); #[derive(Default)] pub(crate) struct PasteBurst { @@ -52,16 +58,14 @@ impl PasteBurst { PASTE_BURST_CHAR_INTERVAL + Duration::from_millis(1) } + #[cfg(test)] + pub(crate) fn recommended_active_flush_delay() -> Duration { + PASTE_BURST_ACTIVE_IDLE_TIMEOUT + Duration::from_millis(1) + } + /// Entry point: decide how to treat a plain char with current timing. pub fn on_plain_char(&mut self, ch: char, now: Instant) -> CharDecision { - match self.last_plain_char_time { - Some(prev) if now.duration_since(prev) <= PASTE_BURST_CHAR_INTERVAL => { - self.consecutive_plain_char_burst = - self.consecutive_plain_char_burst.saturating_add(1) - } - _ => self.consecutive_plain_char_burst = 1, - } - self.last_plain_char_time = Some(now); + self.note_plain_char(now); if self.active { self.burst_window_until = Some(now + PASTE_ENTER_SUPPRESS_WINDOW); @@ -92,6 +96,40 @@ impl PasteBurst { CharDecision::RetainFirstChar } + /// Like on_plain_char(), but never holds the first char. + /// + /// Used for non-ASCII input paths (e.g., IMEs) where holding a character can + /// feel like dropped input, while still allowing burst-based paste detection. + /// + /// Note: This method will only ever return BufferAppend or BeginBuffer. + pub fn on_plain_char_no_hold(&mut self, now: Instant) -> Option { + self.note_plain_char(now); + + if self.active { + self.burst_window_until = Some(now + PASTE_ENTER_SUPPRESS_WINDOW); + return Some(CharDecision::BufferAppend); + } + + if self.consecutive_plain_char_burst >= PASTE_BURST_MIN_CHARS { + return Some(CharDecision::BeginBuffer { + retro_chars: self.consecutive_plain_char_burst.saturating_sub(1), + }); + } + + None + } + + fn note_plain_char(&mut self, now: Instant) { + match self.last_plain_char_time { + Some(prev) if now.duration_since(prev) <= PASTE_BURST_CHAR_INTERVAL => { + self.consecutive_plain_char_burst = + self.consecutive_plain_char_burst.saturating_add(1) + } + _ => self.consecutive_plain_char_burst = 1, + } + self.last_plain_char_time = Some(now); + } + /// Flush the buffered burst if the inter-key timeout has elapsed. /// /// Returns Some(String) when either: @@ -102,9 +140,14 @@ impl PasteBurst { /// /// Returns None if the timeout has not elapsed or there is nothing to flush. pub fn flush_if_due(&mut self, now: Instant) -> FlushResult { + let timeout = if self.is_active_internal() { + PASTE_BURST_ACTIVE_IDLE_TIMEOUT + } else { + PASTE_BURST_CHAR_INTERVAL + }; let timed_out = self .last_plain_char_time - .is_some_and(|t| now.duration_since(t) > PASTE_BURST_CHAR_INTERVAL); + .is_some_and(|t| now.duration_since(t) > timeout); if timed_out && self.is_active_internal() { self.active = false; let out = std::mem::take(&mut self.buffer); diff --git a/codex-rs/tui2/src/bottom_pane/selection_popup_common.rs b/codex-rs/tui2/src/bottom_pane/selection_popup_common.rs index 926cd4f3068..b340e18883f 100644 --- a/codex-rs/tui2/src/bottom_pane/selection_popup_common.rs +++ b/codex-rs/tui2/src/bottom_pane/selection_popup_common.rs @@ -24,6 +24,17 @@ pub(crate) struct GenericDisplayRow { pub wrap_indent: Option, // optional indent for wrapped lines } +pub(crate) fn wrap_styled_line<'a>(line: &'a Line<'a>, width: u16) -> Vec> { + use crate::wrapping::RtOptions; + use crate::wrapping::word_wrap_line; + + let width = width.max(1) as usize; + let opts = RtOptions::new(width) + .initial_indent(Line::from("")) + .subsequent_indent(Line::from("")); + word_wrap_line(line, opts) +} + fn line_width(line: &Line<'_>) -> usize { line.iter() .map(|span| UnicodeWidthStr::width(span.content.as_ref())) diff --git a/codex-rs/tui2/src/bottom_pane/snapshots/codex_tui2__bottom_pane__list_selection_view__tests__list_selection_footer_note_wraps.snap b/codex-rs/tui2/src/bottom_pane/snapshots/codex_tui2__bottom_pane__list_selection_view__tests__list_selection_footer_note_wraps.snap new file mode 100644 index 00000000000..18d0eb2483b --- /dev/null +++ b/codex-rs/tui2/src/bottom_pane/snapshots/codex_tui2__bottom_pane__list_selection_view__tests__list_selection_footer_note_wraps.snap @@ -0,0 +1,14 @@ +--- +source: tui2/src/bottom_pane/list_selection_view.rs +assertion_line: 640 +expression: "render_lines_with_width(&view, 40)" +--- + + Select Approval Mode + +› 1. Read Only (current) Codex can + read files + + Note: Use /setup-elevated-sandbox to + allow network access. + Press enter to confirm or esc to go ba diff --git a/codex-rs/tui2/src/bottom_pane/textarea.rs b/codex-rs/tui2/src/bottom_pane/textarea.rs index 2fd415c7f65..4fc673a11de 100644 --- a/codex-rs/tui2/src/bottom_pane/textarea.rs +++ b/codex-rs/tui2/src/bottom_pane/textarea.rs @@ -63,9 +63,10 @@ impl TextArea { pub fn set_text(&mut self, text: &str) { self.text = text.to_string(); self.cursor_pos = self.cursor_pos.clamp(0, self.text.len()); + self.elements.clear(); + self.cursor_pos = self.clamp_pos_to_nearest_boundary(self.cursor_pos); self.wrap_cache.replace(None); self.preferred_col = None; - self.elements.clear(); self.kill_buffer.clear(); } @@ -735,18 +736,36 @@ impl TextArea { .position(|e| pos > e.range.start && pos < e.range.end) } - fn clamp_pos_to_nearest_boundary(&self, mut pos: usize) -> usize { - if pos > self.text.len() { - pos = self.text.len(); + fn clamp_pos_to_char_boundary(&self, pos: usize) -> usize { + let pos = pos.min(self.text.len()); + if self.text.is_char_boundary(pos) { + return pos; + } + let mut prev = pos; + while prev > 0 && !self.text.is_char_boundary(prev) { + prev -= 1; + } + let mut next = pos; + while next < self.text.len() && !self.text.is_char_boundary(next) { + next += 1; + } + if pos.saturating_sub(prev) <= next.saturating_sub(pos) { + prev + } else { + next } + } + + fn clamp_pos_to_nearest_boundary(&self, pos: usize) -> usize { + let pos = self.clamp_pos_to_char_boundary(pos); if let Some(idx) = self.find_element_containing(pos) { let e = &self.elements[idx]; let dist_start = pos.saturating_sub(e.range.start); let dist_end = e.range.end.saturating_sub(pos); if dist_start <= dist_end { - e.range.start + self.clamp_pos_to_char_boundary(e.range.start) } else { - e.range.end + self.clamp_pos_to_char_boundary(e.range.end) } } else { pos @@ -754,6 +773,7 @@ impl TextArea { } fn clamp_pos_for_insertion(&self, pos: usize) -> usize { + let pos = self.clamp_pos_to_char_boundary(pos); // Do not allow inserting into the middle of an element if let Some(idx) = self.find_element_containing(pos) { let e = &self.elements[idx]; @@ -761,9 +781,9 @@ impl TextArea { let dist_start = pos.saturating_sub(e.range.start); let dist_end = e.range.end.saturating_sub(pos); if dist_start <= dist_end { - e.range.start + self.clamp_pos_to_char_boundary(e.range.start) } else { - e.range.end + self.clamp_pos_to_char_boundary(e.range.end) } } else { pos @@ -1041,6 +1061,7 @@ impl TextArea { mod tests { use super::*; // crossterm types are intentionally not imported here to avoid unused warnings + use pretty_assertions::assert_eq; use rand::prelude::*; fn rand_grapheme(rng: &mut rand::rngs::StdRng) -> String { @@ -1133,6 +1154,27 @@ mod tests { assert_eq!(t.cursor(), 5); } + #[test] + fn insert_str_at_clamps_to_char_boundary() { + let mut t = TextArea::new(); + t.insert_str("你"); + t.set_cursor(0); + t.insert_str_at(1, "A"); + assert_eq!(t.text(), "A你"); + assert_eq!(t.cursor(), 1); + } + + #[test] + fn set_text_clamps_cursor_to_char_boundary() { + let mut t = TextArea::new(); + t.insert_str("abcd"); + t.set_cursor(1); + t.set_text("你"); + assert_eq!(t.cursor(), 0); + t.insert_str("a"); + assert_eq!(t.text(), "a你"); + } + #[test] fn delete_backward_and_forward_edges() { let mut t = ta_with("abc"); diff --git a/codex-rs/tui2/src/chatwidget.rs b/codex-rs/tui2/src/chatwidget.rs index d92cf602140..fb474b7b514 100644 --- a/codex-rs/tui2/src/chatwidget.rs +++ b/codex-rs/tui2/src/chatwidget.rs @@ -61,7 +61,7 @@ use codex_core::protocol::WarningEvent; use codex_core::protocol::WebSearchBeginEvent; use codex_core::protocol::WebSearchEndEvent; use codex_core::skills::model::SkillMetadata; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::approvals::ElicitationRequestEvent; use codex_protocol::parse_command::ParsedCommand; @@ -131,7 +131,7 @@ use codex_common::approval_presets::ApprovalPreset; use codex_common::approval_presets::builtin_approval_presets; use codex_core::AuthManager; use codex_core::CodexAuth; -use codex_core::ConversationManager; +use codex_core::ThreadManager; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; use codex_file_search::FileMatch; @@ -311,7 +311,7 @@ pub(crate) struct ChatWidget { current_status_header: String, // Previous status header to restore after a transient stream retry. retry_status_header: Option, - conversation_id: Option, + conversation_id: Option, frame_requester: FrameRequester, // Whether to include the initial welcome banner on session configured show_welcome_banner: bool, @@ -1023,9 +1023,9 @@ impl ChatWidget { self.needs_final_message_separator = false; needs_redraw = true; } - self.stream_controller = Some(StreamController::new( - self.last_rendered_width.get().map(|w| w.saturating_sub(2)), - )); + // Streaming must not capture the current viewport width: width-derived wraps are + // applied later, at render time, so the transcript can reflow on resize. + self.stream_controller = Some(StreamController::new()); } if let Some(controller) = self.stream_controller.as_mut() && controller.push(&delta) @@ -1264,10 +1264,7 @@ impl ChatWidget { } } - pub(crate) fn new( - common: ChatWidgetInit, - conversation_manager: Arc, - ) -> Self { + pub(crate) fn new(common: ChatWidgetInit, thread_manager: Arc) -> Self { let ChatWidgetInit { config, frame_requester, @@ -1285,7 +1282,7 @@ impl ChatWidget { config.model = Some(model.clone()); let mut rng = rand::rng(); let placeholder = EXAMPLE_PROMPTS[rng.random_range(0..EXAMPLE_PROMPTS.len())].to_string(); - let codex_op_tx = spawn_agent(config.clone(), app_event_tx.clone(), conversation_manager); + let codex_op_tx = spawn_agent(config.clone(), app_event_tx.clone(), thread_manager); let mut widget = Self { app_event_tx: app_event_tx.clone(), @@ -1349,7 +1346,7 @@ impl ChatWidget { /// Create a ChatWidget attached to an existing conversation (e.g., a fork). pub(crate) fn new_from_existing( common: ChatWidgetInit, - conversation: std::sync::Arc, + conversation: std::sync::Arc, session_configured: codex_core::protocol::SessionConfiguredEvent, ) -> Self { let ChatWidgetInit { @@ -1500,6 +1497,9 @@ impl ChatWidget { InputResult::Command(cmd) => { self.dispatch_command(cmd); } + InputResult::CommandWithArgs(cmd, args) => { + self.dispatch_command_with_args(cmd, args); + } InputResult::None => {} } } @@ -1545,6 +1545,10 @@ impl ChatWidget { SlashCommand::Resume => { self.app_event_tx.send(AppEvent::OpenResumePicker); } + SlashCommand::Rename => { + self.show_rename_prompt(); + self.request_redraw(); + } SlashCommand::Init => { let init_target = self.config.cwd.join(DEFAULT_PROJECT_DOC_FILENAME); if init_target.exists() { @@ -1665,6 +1669,73 @@ impl ChatWidget { } } + fn show_rename_prompt(&mut self) { + let tx = self.app_event_tx.clone(); + let view = CustomPromptView::new( + "Rename session".to_string(), + "Type a new name and press Enter".to_string(), + None, + Box::new(move |title: String| { + tx.send(AppEvent::InsertHistoryCell(Box::new( + history_cell::new_info_event(format!("Session renamed to \"{title}\""), None), + ))); + tx.send(AppEvent::CodexOp(Op::SetSessionTitle { title })); + }), + ); + + self.bottom_pane.show_view(Box::new(view)); + } + + fn dispatch_command_with_args(&mut self, cmd: SlashCommand, args: String) { + if !cmd.available_during_task() && self.bottom_pane.is_task_running() { + let message = format!( + "'/{}' is disabled while a task is in progress.", + cmd.command() + ); + self.add_to_history(history_cell::new_error_event(message)); + self.request_redraw(); + return; + } + + let trimmed = args.trim(); + match cmd { + SlashCommand::Rename if !trimmed.is_empty() => { + let name = trimmed.to_string(); + self.add_info_message(format!("Session renamed to \"{name}\""), None); + self.app_event_tx + .send(AppEvent::CodexOp(Op::SetSessionName { name })); + } + SlashCommand::Review if !trimmed.is_empty() => { + self.submit_op(Op::Review { + review_request: ReviewRequest { + target: ReviewTarget::Custom { + instructions: trimmed.to_string(), + }, + user_facing_hint: None, + }, + }); + } + _ => self.dispatch_command(cmd), + } + } + + fn show_rename_prompt(&mut self) { + let tx = self.app_event_tx.clone(); + let view = CustomPromptView::new( + "Rename session".to_string(), + "Type a new name and press Enter".to_string(), + None, + Box::new(move |name: String| { + tx.send(AppEvent::InsertHistoryCell(Box::new( + history_cell::new_info_event(format!("Session renamed to \"{name}\""), None), + ))); + tx.send(AppEvent::CodexOp(Op::SetSessionName { name })); + }), + ); + + self.bottom_pane.show_view(Box::new(view)); + } + pub(crate) fn handle_paste(&mut self, text: String) { self.bottom_pane.handle_paste(text); } @@ -1761,7 +1832,10 @@ impl ChatWidget { } self.codex_op_tx - .send(Op::UserInput { items }) + .send(Op::UserInput { + items, + final_output_json_schema: None, + }) .unwrap_or_else(|e| { tracing::error!("failed to send message: {e}"); }); @@ -1918,6 +1992,7 @@ impl ChatWidget { EventMsg::ExitedReviewMode(review) => self.on_exited_review_mode(review), EventMsg::ContextCompacted(_) => self.on_agent_message("Context compacted".to_owned()), EventMsg::RawResponseItem(_) + | EventMsg::ThreadRolledBack(_) | EventMsg::ItemStarted(_) | EventMsg::ItemCompleted(_) | EventMsg::AgentMessageContentDelta(_) @@ -2102,7 +2177,7 @@ impl ChatWidget { let models = self.models_manager.try_list_models(&self.config).ok()?; models .iter() - .find(|preset| preset.model == NUDGE_MODEL_SLUG) + .find(|preset| preset.show_in_picker && preset.model == NUDGE_MODEL_SLUG) .cloned() } @@ -2205,19 +2280,24 @@ impl ChatWidget { /// Open a popup to choose a quick auto model. Selecting "All models" /// opens the full picker with every available preset. pub(crate) fn open_model_popup(&mut self) { - let presets: Vec = - // todo(aibrahim): make this async function - match self.models_manager.try_list_models(&self.config) { - Ok(models) => models, - Err(_) => { - self.add_info_message( - "Models are being updated; please try /model again in a moment." - .to_string(), - None, - ); - return; - } - }; + let presets: Vec = match self.models_manager.try_list_models(&self.config) { + Ok(models) => models, + Err(_) => { + self.add_info_message( + "Models are being updated; please try /model again in a moment.".to_string(), + None, + ); + return; + } + }; + self.open_model_popup_with_presets(presets); + } + + pub(crate) fn open_model_popup_with_presets(&mut self, presets: Vec) { + let presets: Vec = presets + .into_iter() + .filter(|preset| preset.show_in_picker) + .collect(); let current_label = presets .iter() @@ -3303,7 +3383,7 @@ impl ChatWidget { .unwrap_or_default() } - pub(crate) fn conversation_id(&self) -> Option { + pub(crate) fn conversation_id(&self) -> Option { self.conversation_id } diff --git a/codex-rs/tui2/src/chatwidget/agent.rs b/codex-rs/tui2/src/chatwidget/agent.rs index 240972347fb..0e6fa2712b4 100644 --- a/codex-rs/tui2/src/chatwidget/agent.rs +++ b/codex-rs/tui2/src/chatwidget/agent.rs @@ -1,8 +1,8 @@ use std::sync::Arc; -use codex_core::CodexConversation; -use codex_core::ConversationManager; -use codex_core::NewConversation; +use codex_core::CodexThread; +use codex_core::NewThread; +use codex_core::ThreadManager; use codex_core::config::Config; use codex_core::protocol::Event; use codex_core::protocol::EventMsg; @@ -18,17 +18,17 @@ use crate::app_event_sender::AppEventSender; pub(crate) fn spawn_agent( config: Config, app_event_tx: AppEventSender, - server: Arc, + server: Arc, ) -> UnboundedSender { let (codex_op_tx, mut codex_op_rx) = unbounded_channel::(); let app_event_tx_clone = app_event_tx; tokio::spawn(async move { - let NewConversation { - conversation_id: _, - conversation, + let NewThread { + thread, session_configured, - } = match server.new_conversation(config).await { + thread_id: _, + } = match server.start_thread(config).await { Ok(v) => v, #[allow(clippy::print_stderr)] Err(err) => { @@ -52,17 +52,17 @@ pub(crate) fn spawn_agent( }; app_event_tx_clone.send(AppEvent::CodexEvent(ev)); - let conversation_clone = conversation.clone(); + let thread_clone = thread.clone(); tokio::spawn(async move { while let Some(op) = codex_op_rx.recv().await { - let id = conversation_clone.submit(op).await; + let id = thread_clone.submit(op).await; if let Err(e) = id { tracing::error!("failed to submit op: {e}"); } } }); - while let Ok(event) = conversation.next_event().await { + while let Ok(event) = thread.next_event().await { app_event_tx_clone.send(AppEvent::CodexEvent(event)); } }); @@ -70,11 +70,11 @@ pub(crate) fn spawn_agent( codex_op_tx } -/// Spawn agent loops for an existing conversation (e.g., a forked conversation). +/// Spawn agent loops for an existing thread (e.g., a forked thread). /// Sends the provided `SessionConfiguredEvent` immediately, then forwards subsequent /// events and accepts Ops for submission. pub(crate) fn spawn_agent_from_existing( - conversation: std::sync::Arc, + thread: std::sync::Arc, session_configured: codex_core::protocol::SessionConfiguredEvent, app_event_tx: AppEventSender, ) -> UnboundedSender { @@ -89,17 +89,17 @@ pub(crate) fn spawn_agent_from_existing( }; app_event_tx_clone.send(AppEvent::CodexEvent(ev)); - let conversation_clone = conversation.clone(); + let thread_clone = thread.clone(); tokio::spawn(async move { while let Some(op) = codex_op_rx.recv().await { - let id = conversation_clone.submit(op).await; + let id = thread_clone.submit(op).await; if let Err(e) = id { tracing::error!("failed to submit op: {e}"); } } }); - while let Ok(event) = conversation.next_event().await { + while let Ok(event) = thread.next_event().await { app_event_tx_clone.send(AppEvent::CodexEvent(event)); } }); diff --git a/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_exec_multiline_prefix_no_execpolicy.snap b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_exec_multiline_prefix_no_execpolicy.snap new file mode 100644 index 00000000000..962a38ebde6 --- /dev/null +++ b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_exec_multiline_prefix_no_execpolicy.snap @@ -0,0 +1,16 @@ +--- +source: tui2/src/chatwidget/tests.rs +expression: contents +--- + + + Would you like to run the following command? + + $ python - <<'PY' + print('hello') + PY + +› 1. Yes, proceed (y) + 2. No, and tell Codex what to do differently (esc) + + Press enter to confirm or esc to cancel diff --git a/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_patch.snap b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_patch.snap index a5bfd136b78..c2ec1675066 100644 --- a/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_patch.snap +++ b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__approval_modal_patch.snap @@ -2,6 +2,8 @@ source: tui2/src/chatwidget/tests.rs expression: terminal.backend().vt100().screen().contents() --- + + Would you like to make the following edits? Reason: The model wants to apply changes @@ -12,6 +14,7 @@ expression: terminal.backend().vt100().screen().contents() 2 +world › 1. Yes, proceed (y) - 2. No, and tell Codex what to do differently (esc) + 2. Yes, and don't ask again for these files (a) + 3. No, and tell Codex what to do differently (esc) Press enter to confirm or esc to cancel diff --git a/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__model_picker_filters_hidden_models.snap b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__model_picker_filters_hidden_models.snap new file mode 100644 index 00000000000..a03d434905f --- /dev/null +++ b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui2__chatwidget__tests__model_picker_filters_hidden_models.snap @@ -0,0 +1,11 @@ +--- +source: tui2/src/chatwidget/tests.rs +assertion_line: 1758 +expression: popup +--- + Select Model and Effort + Access legacy models by running codex -m or in your config.toml + +› 1. test-visible-model (current) test-visible-model description + + Press enter to select reasoning effort, or esc to dismiss. diff --git a/codex-rs/tui2/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap index ed18675ac39..e394605dcc5 100644 --- a/codex-rs/tui2/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap +++ b/codex-rs/tui2/src/chatwidget/snapshots/codex_tui__chatwidget__tests__approval_modal_patch.snap @@ -2,6 +2,8 @@ source: tui/src/chatwidget/tests.rs expression: terminal.backend().vt100().screen().contents() --- + + Would you like to make the following edits? Reason: The model wants to apply changes @@ -12,6 +14,7 @@ expression: terminal.backend().vt100().screen().contents() 2 +world › 1. Yes, proceed (y) - 2. No, and tell Codex what to do differently (esc) + 2. Yes, and don't ask again for these files (a) + 3. No, and tell Codex what to do differently (esc) Press enter to confirm or esc to cancel diff --git a/codex-rs/tui2/src/chatwidget/tests.rs b/codex-rs/tui2/src/chatwidget/tests.rs index 8b216812dfd..09f5073e75e 100644 --- a/codex-rs/tui2/src/chatwidget/tests.rs +++ b/codex-rs/tui2/src/chatwidget/tests.rs @@ -45,7 +45,7 @@ use codex_core::protocol::UndoCompletedEvent; use codex_core::protocol::UndoStartedEvent; use codex_core::protocol::ViewImageToolCallEvent; use codex_core::protocol::WarningEvent; -use codex_protocol::ConversationId; +use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffortPreset; @@ -99,7 +99,7 @@ fn snapshot(percent: f64) -> RateLimitSnapshot { async fn resumed_initial_messages_render_history() { let (mut chat, mut rx, _ops) = make_chatwidget_manual(None).await; - let conversation_id = ConversationId::new(); + let conversation_id = ThreadId::new(); let rollout_file = NamedTempFile::new().unwrap(); let configured = codex_core::protocol::SessionConfiguredEvent { session_id: conversation_id, @@ -311,7 +311,7 @@ async fn helpers_are_available_and_do_not_panic() { let tx = AppEventSender::new(tx_raw); let cfg = test_config().await; let resolved_model = ModelsManager::get_model_offline(cfg.model.as_deref()); - let conversation_manager = Arc::new(ConversationManager::with_models_provider( + let thread_manager = Arc::new(ThreadManager::with_models_provider( CodexAuth::from_api_key("test"), cfg.model_provider.clone(), )); @@ -324,12 +324,12 @@ async fn helpers_are_available_and_do_not_panic() { initial_images: Vec::new(), enhanced_keys_supported: false, auth_manager, - models_manager: conversation_manager.get_models_manager(), + models_manager: thread_manager.get_models_manager(), feedback: codex_feedback::CodexFeedback::new(), is_first_run: true, model: resolved_model, }; - let mut w = ChatWidget::new(init, conversation_manager); + let mut w = ChatWidget::new(init, thread_manager); // Basic construction sanity. let _ = &mut w; } @@ -363,6 +363,7 @@ async fn make_chatwidget_manual( skills: None, }); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")); + let codex_home = cfg.codex_home.clone(); let widget = ChatWidget { app_event_tx, codex_op_tx: op_tx, @@ -371,7 +372,7 @@ async fn make_chatwidget_manual( config: cfg, model: resolved_model.clone(), auth_manager: auth_manager.clone(), - models_manager: Arc::new(ModelsManager::new(auth_manager)), + models_manager: Arc::new(ModelsManager::new(codex_home, auth_manager)), session_header: SessionHeader::new(resolved_model), initial_user_message: None, token_info: None, @@ -410,7 +411,10 @@ async fn make_chatwidget_manual( fn set_chatgpt_auth(chat: &mut ChatWidget) { chat.auth_manager = AuthManager::from_auth_for_testing(CodexAuth::create_dummy_chatgpt_auth_for_testing()); - chat.models_manager = Arc::new(ModelsManager::new(chat.auth_manager.clone())); + chat.models_manager = Arc::new(ModelsManager::new( + chat.config.codex_home.clone(), + chat.auth_manager.clone(), + )); } pub(crate) async fn make_chatwidget_manual_with_sender() -> ( @@ -1731,6 +1735,41 @@ async fn model_selection_popup_snapshot() { assert_snapshot!("model_selection_popup", popup); } +#[tokio::test] +async fn model_picker_hides_show_in_picker_false_models_from_cache() { + let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("test-visible-model")).await; + let preset = |slug: &str, show_in_picker: bool| ModelPreset { + id: slug.to_string(), + model: slug.to_string(), + display_name: slug.to_string(), + description: format!("{slug} description"), + default_reasoning_effort: ReasoningEffortConfig::Medium, + supported_reasoning_efforts: vec![ReasoningEffortPreset { + effort: ReasoningEffortConfig::Medium, + description: "medium".to_string(), + }], + is_default: false, + upgrade: None, + show_in_picker, + supported_in_api: true, + }; + + chat.open_model_popup_with_presets(vec![ + preset("test-visible-model", true), + preset("test-hidden-model", false), + ]); + let popup = render_bottom_popup(&chat, 80); + assert_snapshot!("model_picker_filters_hidden_models", popup); + assert!( + popup.contains("test-visible-model"), + "expected visible model to appear in picker:\n{popup}" + ); + assert!( + !popup.contains("test-hidden-model"), + "expected hidden model to be excluded from picker:\n{popup}" + ); +} + #[tokio::test] async fn approvals_selection_popup_snapshot() { let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await; @@ -2123,6 +2162,45 @@ async fn approval_modal_exec_without_reason_snapshot() { ); } +// Snapshot test: approval modal with a proposed execpolicy prefix that is multi-line; +// we should not offer adding it to execpolicy. +#[tokio::test] +async fn approval_modal_exec_multiline_prefix_hides_execpolicy_option_snapshot() { + let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await; + chat.config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + + let script = "python - <<'PY'\nprint('hello')\nPY".to_string(); + let command = vec!["bash".into(), "-lc".into(), script]; + let ev = ExecApprovalRequestEvent { + call_id: "call-approve-cmd-multiline-trunc".into(), + turn_id: "turn-approve-cmd-multiline-trunc".into(), + command: command.clone(), + cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)), + parsed_cmd: vec![], + }; + chat.handle_codex_event(Event { + id: "sub-approve-multiline-trunc".into(), + msg: EventMsg::ExecApprovalRequest(ev), + }); + + let width = 100; + let height = chat.desired_height(width); + let mut terminal = + ratatui::Terminal::new(VT100Backend::new(width, height)).expect("create terminal"); + terminal.set_viewport_area(Rect::new(0, 0, width, height)); + terminal + .draw(|f| chat.render(f.area(), f.buffer_mut())) + .expect("draw approval modal (multiline prefix)"); + let contents = terminal.backend().vt100().screen().contents(); + assert!(!contents.contains("don't ask again")); + assert_snapshot!( + "approval_modal_exec_multiline_prefix_no_execpolicy", + contents + ); +} + // Snapshot test: patch approval modal #[tokio::test] async fn approval_modal_patch_snapshot() { diff --git a/codex-rs/tui2/src/diff_render.rs b/codex-rs/tui2/src/diff_render.rs index 1bda0e53dd8..b92f24349a8 100644 --- a/codex-rs/tui2/src/diff_render.rs +++ b/codex-rs/tui2/src/diff_render.rs @@ -299,7 +299,18 @@ fn render_change(change: &FileChange, out: &mut Vec>, width: usi } } +/// Format a path for display relative to the current working directory when +/// possible, keeping output stable in jj/no-`.git` workspaces (e.g. image +/// tool calls should show `example.png` instead of an absolute path). pub(crate) fn display_path_for(path: &Path, cwd: &Path) -> String { + if path.is_relative() { + return path.display().to_string(); + } + + if let Ok(stripped) = path.strip_prefix(cwd) { + return stripped.display().to_string(); + } + // Prefer a stable, user-local relative path when the file is under the current working // directory. This keeps output deterministic in jj-only repos (no `.git`) and matches user // expectations for "files in this project". @@ -431,6 +442,7 @@ fn style_del() -> Style { mod tests { use super::*; use insta::assert_snapshot; + use pretty_assertions::assert_eq; use ratatui::Terminal; use ratatui::backend::TestBackend; use ratatui::text::Text; @@ -470,6 +482,26 @@ mod tests { assert_snapshot!(name, text); } + #[test] + fn display_path_prefers_cwd_without_git_repo() { + let cwd = if cfg!(windows) { + PathBuf::from(r"C:\workspace\codex") + } else { + PathBuf::from("/workspace/codex") + }; + let path = cwd.join("tui").join("example.png"); + + let rendered = display_path_for(&path, &cwd); + + assert_eq!( + rendered, + PathBuf::from("tui") + .join("example.png") + .display() + .to_string() + ); + } + #[test] fn ui_snapshot_wrap_behavior_insert() { // Narrow width to force wrapping within our diff line rendering diff --git a/codex-rs/tui2/src/history_cell.rs b/codex-rs/tui2/src/history_cell.rs index 7696a387528..3124d0fc31f 100644 --- a/codex-rs/tui2/src/history_cell.rs +++ b/codex-rs/tui2/src/history_cell.rs @@ -119,6 +119,19 @@ pub(crate) trait HistoryCell: std::fmt::Debug + Send + Sync + Any { /// Most cells can use the default implementation (no joiners), but cells that apply wrapping /// should override this and return joiners derived from the same wrapping operation so /// clipboard reconstruction can distinguish hard breaks from soft wraps. + /// + /// `joiner_before[i]` describes the boundary *between* `lines[i - 1]` and `lines[i]`: + /// + /// - `None` means "hard break": copy inserts a newline between the two lines. + /// - `Some(joiner)` means "soft wrap continuation": copy inserts `joiner` and continues on the + /// same logical line. + /// + /// Example (one logical line wrapped across two visual lines): + /// + /// - `lines = ["• Hello", " world"]` + /// - `joiner_before = [None, Some(\" \")]` + /// + /// Copy should produce `"Hello world"` (no hard newline). fn transcript_lines_with_joiners(&self, width: u16) -> TranscriptLinesWithJoiners { let lines = self.transcript_lines(width); TranscriptLinesWithJoiners { @@ -313,14 +326,64 @@ impl HistoryCell for ReasoningSummaryCell { #[derive(Debug)] pub(crate) struct AgentMessageCell { - lines: Vec>, + /// Width-agnostic logical markdown lines for this chunk. + /// + /// These are produced either: + /// - by streaming (`markdown_stream` → `markdown_render::render_markdown_logical_lines`), or + /// - by legacy/non-streaming callers that pass pre-rendered `Vec` via [`Self::new`]. + /// + /// Importantly, this stores *logical* lines, not already-wrapped visual lines, so the transcript + /// can reflow on resize. + logical_lines: Vec, + /// Whether this cell should render the leading transcript bullet (`• `). + /// + /// Streaming emits multiple immutable `AgentMessageCell`s per assistant message; only the first + /// chunk shows the bullet. Continuations use a two-space gutter. is_first_line: bool, } impl AgentMessageCell { + /// Construct an agent message cell from already-rendered `Line`s. + /// + /// This is primarily used by non-streaming paths. The lines are treated as already "logical" + /// lines (no additional markdown indentation metadata is available), and wrapping is still + /// performed at render time so the transcript can reflow on resize. pub(crate) fn new(lines: Vec>, is_first_line: bool) -> Self { Self { - lines, + logical_lines: lines + .into_iter() + .map(|line| { + let is_preformatted = line.style.fg == Some(ratatui::style::Color::Cyan); + let line_style = line.style; + let content = Line { + style: Style::default(), + alignment: line.alignment, + spans: line.spans, + }; + crate::markdown_render::MarkdownLogicalLine { + content, + initial_indent: Line::default(), + subsequent_indent: Line::default(), + line_style, + is_preformatted, + } + }) + .collect(), + is_first_line, + } + } + + /// Construct an agent message cell from markdown logical lines. + /// + /// This is the preferred streaming constructor: it preserves markdown indentation rules (list + /// markers, nested list continuation indent, blockquote prefix, etc.) so wrapping can be + /// performed correctly at render time for the current viewport width. + pub(crate) fn new_logical( + logical_lines: Vec, + is_first_line: bool, + ) -> Self { + Self { + logical_lines, is_first_line, } } @@ -331,43 +394,98 @@ impl HistoryCell for AgentMessageCell { self.transcript_lines_with_joiners(width).lines } + /// Render wrapped transcript lines plus soft-wrap joiners. + /// + /// This is where width-dependent wrapping happens for streaming agent output. The cell composes + /// indentation as: + /// + /// - transcript gutter (`• ` or ` `), plus + /// - markdown-provided indent/prefix spans (`initial_indent` / `subsequent_indent`) + /// + /// The wrapping algorithm returns a `joiner_before` vector so copy/paste can treat soft wraps + /// as joinable (no hard newline) while preserving exact whitespace at wrap boundaries. fn transcript_lines_with_joiners(&self, width: u16) -> TranscriptLinesWithJoiners { - use ratatui::style::Color; + if width == 0 { + return TranscriptLinesWithJoiners { + lines: Vec::new(), + joiner_before: Vec::new(), + }; + } let mut out_lines: Vec> = Vec::new(); let mut joiner_before: Vec> = Vec::new(); - let mut is_first_output_line = true; - for line in &self.lines { - let is_code_block_line = line.style.fg == Some(Color::Cyan); - let initial_indent: Line<'static> = if is_first_output_line && self.is_first_line { + // `at_cell_start` tracks whether we're about to emit the first *visual* line of this cell. + // Only the first chunk of a streamed message gets the `• ` gutter; continuations use ` `. + let mut at_cell_start = true; + for logical in &self.logical_lines { + let gutter_first_visual_line: Line<'static> = if at_cell_start && self.is_first_line { "• ".dim().into() } else { " ".into() }; - let subsequent_indent: Line<'static> = " ".into(); + let gutter_continuation: Line<'static> = " ".into(); + + // Compose the transcript gutter with markdown-provided indentation: + // + // - `gutter_*` is the transcript-level prefix (`• ` / ` `). + // - `initial_indent` / `subsequent_indent` come from markdown structure (blockquote + // prefix, list marker indentation, nested list continuation indentation, etc.). + // + // We apply these indents during wrapping so: + // - the UI renders with correct continuation indentation, and + // - soft-wrap joiners stay aligned with the exact whitespace the wrapper skipped. + let compose_indent = + |gutter: &Line<'static>, md_indent: &Line<'static>| -> Line<'static> { + let mut spans = gutter.spans.clone(); + spans.extend(md_indent.spans.iter().cloned()); + Line::from(spans) + }; - if is_code_block_line { - let mut spans = initial_indent.spans; - spans.extend(line.spans.iter().cloned()); - out_lines.push(Line::from(spans).style(line.style)); + // Preformatted lines are rendered as a single visual line (no wrapping). + // This preserves code-block whitespace and keeps code copy behavior stable. + if logical.is_preformatted { + let mut spans = gutter_first_visual_line.spans.clone(); + spans.extend(logical.initial_indent.spans.iter().cloned()); + spans.extend(logical.content.spans.iter().cloned()); + out_lines.push(Line::from(spans).style(logical.line_style)); joiner_before.push(None); - is_first_output_line = false; + at_cell_start = false; continue; } + // Prose path: wrap to current width and capture joiners. + // + // `word_wrap_line_with_joiners` guarantees: + // - `wrapped.len() == wrapped_joiners.len()` + // - `wrapped_joiners[0] == None` (first visual segment of a logical line is a hard break) + // - subsequent entries are `Some(joiner)` (soft-wrap continuations). let opts = RtOptions::new(width as usize) - .initial_indent(initial_indent) - .subsequent_indent(subsequent_indent.clone()); + .initial_indent(compose_indent( + &gutter_first_visual_line, + &logical.initial_indent, + )) + .subsequent_indent(compose_indent( + &gutter_continuation, + &logical.subsequent_indent, + )); + let (wrapped, wrapped_joiners) = - crate::wrapping::word_wrap_line_with_joiners(line, opts); - for (l, j) in wrapped.into_iter().zip(wrapped_joiners) { - out_lines.push(line_to_static(&l)); - joiner_before.push(j); - is_first_output_line = false; + crate::wrapping::word_wrap_line_with_joiners(&logical.content, opts); + for (visual, joiner) in wrapped.into_iter().zip(wrapped_joiners) { + out_lines.push(line_to_static(&visual).style(logical.line_style)); + joiner_before.push(joiner); + at_cell_start = false; } } + debug_assert_eq!(out_lines.len(), joiner_before.len()); + debug_assert!( + joiner_before + .first() + .is_none_or(std::option::Option::is_none) + ); + TranscriptLinesWithJoiners { lines: out_lines, joiner_before, @@ -699,11 +817,11 @@ pub(crate) fn padded_emoji(emoji: &str) -> String { #[derive(Debug)] struct TooltipHistoryCell { - tip: &'static str, + tip: String, } impl TooltipHistoryCell { - fn new(tip: &'static str) -> Self { + fn new(tip: String) -> Self { Self { tip } } } @@ -1674,6 +1792,131 @@ mod tests { render_lines(&cell.transcript_lines(u16::MAX)) } + /// Remove a single leading markdown blockquote marker (`> `) from `line`. + /// + /// This is a test-only normalization helper. + /// + /// In the rendered transcript, blockquote indentation is represented as literal `> ` spans in + /// the line prefix. For wrapped blockquote prose, those prefix spans can appear on every visual + /// line (including soft-wrap continuations). When we want to compare the *logical* joined text + /// across different widths, we strip the repeated marker on continuation lines so the + /// comparison doesn't fail due to prefix duplication. + fn strip_leading_blockquote_marker(line: &str) -> String { + let mut out = String::with_capacity(line.len()); + let mut seen_non_space = false; + let mut removed = false; + let mut chars = line.chars().peekable(); + while let Some(ch) = chars.next() { + if !seen_non_space { + if ch == ' ' { + out.push(ch); + continue; + } + seen_non_space = true; + if ch == '>' && !removed { + removed = true; + if matches!(chars.peek(), Some(' ')) { + chars.next(); + } + continue; + } + } + out.push(ch); + } + out + } + + /// Normalize rendered transcript output into a width-insensitive "logical text" string. + /// + /// This is used by resize/reflow tests: + /// + /// - Joiners tell us which visual line breaks are soft wraps (`Some(joiner)`) vs hard breaks + /// (`None`). + /// - For soft-wrap continuation lines, we strip repeated blockquote markers so we can compare + /// the underlying prose independent of prefix repetition. + /// - Finally, we collapse whitespace so wrapping differences (line breaks vs spaces) do not + /// affect equality. + fn normalize_rendered_text_with_joiners(tr: &TranscriptLinesWithJoiners) -> String { + let mut rendered = render_lines(&tr.lines); + for (line, joiner) in rendered.iter_mut().zip(&tr.joiner_before) { + if joiner.is_some() { + *line = strip_leading_blockquote_marker(line); + } + } + rendered + .join("\n") + .split_whitespace() + .collect::>() + .join(" ") + } + + #[test] + fn agent_message_cell_reflows_streamed_prose_on_resize() { + let md = concat!( + "- This is a long list item that should reflow when the viewport width changes. ", + "The old streaming implementation used to bake soft wraps into hard line breaks.\n", + "> A blockquote line that is also long enough to wrap and should reflow cleanly.\n", + ); + let logical_lines = crate::markdown_stream::simulate_stream_markdown_for_tests(&[md], true); + let cell = AgentMessageCell::new_logical(logical_lines, true); + + let narrow = cell.transcript_lines_with_joiners(28); + let wide = cell.transcript_lines_with_joiners(80); + + assert!( + narrow.lines.len() > wide.lines.len(), + "expected fewer visual lines at wider width; narrow={} wide={}", + narrow.lines.len(), + wide.lines.len() + ); + assert_eq!( + normalize_rendered_text_with_joiners(&narrow), + normalize_rendered_text_with_joiners(&wide) + ); + + let snapshot = format!( + "narrow:\n{}\n\nwide:\n{}", + render_lines(&narrow.lines).join("\n"), + render_lines(&wide.lines).join("\n") + ); + insta::assert_snapshot!("agent_message_cell_reflow_on_resize", snapshot); + } + + #[test] + fn agent_message_cell_reflows_streamed_prose_vt100_snapshot() { + use crate::test_backend::VT100Backend; + + let md = concat!( + "- This is a long list item that should reflow when the viewport width changes.\n", + "> A blockquote that also reflows across widths.\n", + ); + let logical_lines = crate::markdown_stream::simulate_stream_markdown_for_tests(&[md], true); + let cell = AgentMessageCell::new_logical(logical_lines, true); + + let render = |width, height| -> String { + let backend = VT100Backend::new(width, height); + let mut terminal = ratatui::Terminal::new(backend).expect("terminal"); + terminal + .draw(|f| { + let area = f.area(); + let lines = cell.display_lines(area.width); + Paragraph::new(Text::from(lines)) + .wrap(Wrap { trim: false }) + .render(area, f.buffer_mut()); + }) + .expect("draw"); + terminal.backend().vt100().screen().contents() + }; + + let narrow = render(30, 12); + let wide = render(70, 12); + + insta::assert_snapshot!( + "agent_message_cell_reflow_on_resize_vt100", + format!("narrow:\n{narrow}\n\nwide:\n{wide}") + ); + } + #[tokio::test] async fn mcp_tools_output_masks_sensitive_values() { let mut config = test_config().await; diff --git a/codex-rs/tui2/src/insert_history.rs b/codex-rs/tui2/src/insert_history.rs index 86a32c16ce8..1b236e8eec3 100644 --- a/codex-rs/tui2/src/insert_history.rs +++ b/codex-rs/tui2/src/insert_history.rs @@ -1,9 +1,13 @@ -//! Render `ratatui` transcript lines into terminal scrollback. +//! Render `ratatui` transcript lines into terminal output (scrollback) and/or deterministic ANSI. //! //! `insert_history_lines` is responsible for inserting rendered transcript lines //! *above* the TUI viewport by emitting ANSI control sequences through the //! terminal backend writer. //! +//! Note: the current `tui2` main draw loop does not call `insert_history_lines` (see +//! `codex-rs/tui2/src/tui.rs`). This module is still used for deterministic ANSI emission via +//! `write_spans` (e.g., "print after exit" flows) and for tests. +//! //! ## Why we use crossterm style commands //! //! `write_spans` is also used by non-terminal callers (e.g. diff --git a/codex-rs/tui2/src/lib.rs b/codex-rs/tui2/src/lib.rs index 3c5ac92f6c3..1c161bf6278 100644 --- a/codex-rs/tui2/src/lib.rs +++ b/codex-rs/tui2/src/lib.rs @@ -19,7 +19,7 @@ use codex_core::config::ConfigOverrides; use codex_core::config::find_codex_home; use codex_core::config::load_config_as_toml_with_cli_overrides; use codex_core::config::resolve_oss_provider; -use codex_core::find_conversation_path_by_id_str; +use codex_core::find_thread_path_by_id_str; use codex_core::get_platform_sandbox; use codex_core::protocol::AskForApproval; use codex_protocol::config_types::SandboxMode; @@ -81,6 +81,8 @@ mod transcript_copy_action; mod transcript_copy_ui; mod transcript_multi_click; mod transcript_render; +mod transcript_scrollbar; +mod transcript_scrollbar_ui; mod transcript_selection; mod transcript_view_cache; mod tui; @@ -359,6 +361,8 @@ async fn run_ratatui_app( ) -> color_eyre::Result { color_eyre::install()?; + tooltips::announcement::prewarm(); + // Forward panic reports through tracing so they appear in the UI status // line, but do not swallow the default/color-eyre panic handler. // Chain to the previous hook so users still get a rich panic report @@ -446,7 +450,7 @@ async fn run_ratatui_app( // Determine resume behavior: explicit id, then resume last, then picker. let resume_selection = if let Some(id_str) = cli.resume_session_id.as_deref() { - match find_conversation_path_by_id_str(&config.codex_home, id_str).await? { + match find_thread_path_by_id_str(&config.codex_home, id_str).await? { Some(path) => resume_picker::ResumeSelection::Resume(path), None => { error!("Error finding conversation path: {id_str}"); @@ -469,7 +473,7 @@ async fn run_ratatui_app( } } else if cli.resume_last { let provider_filter = vec![config.model_provider_id.clone()]; - match RolloutRecorder::list_conversations( + match RolloutRecorder::list_threads( &config.codex_home, 1, None, diff --git a/codex-rs/tui2/src/markdown_render.rs b/codex-rs/tui2/src/markdown_render.rs index 22a234326aa..c285ca41a45 100644 --- a/codex-rs/tui2/src/markdown_render.rs +++ b/codex-rs/tui2/src/markdown_render.rs @@ -1,3 +1,37 @@ +//! Markdown rendering for `tui2`. +//! +//! This module has two related but intentionally distinct responsibilities: +//! +//! 1. **Parse Markdown into styled text** (for display). +//! 2. **Preserve width-agnostic structure for reflow** (for streaming + resize). +//! +//! ## Why logical lines exist +//! +//! TUI2 supports viewport resize reflow and copy/paste that treats soft-wrapped prose as a single +//! logical line. If we apply wrapping while rendering and store the resulting `Vec`, those +//! width-derived breaks become indistinguishable from hard newlines and cannot be "unwrapped" when +//! the viewport gets wider. +//! +//! To avoid baking width, streaming uses [`MarkdownLogicalLine`] output: +//! +//! - `content` holds the styled spans for a single *logical* line (a hard break boundary). +//! - `initial_indent` / `subsequent_indent` encode markdown-aware indentation rules for wraps +//! (list markers, nested lists, blockquotes, etc.). +//! - `line_style` captures line-level styling (e.g., blockquote green) that must apply to all +//! wrapped segments. +//! - `is_preformatted` marks runs that should not be wrapped like prose (e.g., fenced code). +//! +//! History cells can then wrap `content` at the *current* width, applying indents appropriately and +//! returning soft-wrap joiners for correct copy/paste. +//! +//! ## Outputs +//! +//! - [`render_markdown_text_with_width`]: emits a `Text` suitable for immediate display and may +//! apply wrapping if a width is provided. +//! - [`render_markdown_logical_lines`]: emits width-agnostic logical lines (no wrapping). +//! +//! The underlying `Writer` can emit either (or both) depending on call site needs. + use crate::render::line_utils::line_to_static; use crate::wrapping::RtOptions; use crate::wrapping::word_wrap_line; @@ -14,6 +48,31 @@ use ratatui::text::Line; use ratatui::text::Span; use ratatui::text::Text; +/// A single width-agnostic markdown "logical line" plus the metadata required to wrap it later. +/// +/// A logical line is a hard-break boundary produced by markdown parsing (explicit newlines, +/// paragraph boundaries, list item boundaries, etc.). It is not a viewport-derived wrap segment. +/// +/// Wrapping is performed later (typically in `HistoryCell::transcript_lines_with_joiners(width)`), +/// where a cell can: +/// +/// - prepend a transcript gutter prefix (`• ` / ` `), +/// - prepend markdown-specific indents (`initial_indent` / `subsequent_indent`), and +/// - wrap `content` to the current width while producing joiners for copy/paste. +#[derive(Clone, Debug)] +pub(crate) struct MarkdownLogicalLine { + /// The raw content for this logical line (does not include markdown prefix/indent spans). + pub(crate) content: Line<'static>, + /// Prefix/indent spans to apply to the first visual line when wrapping. + pub(crate) initial_indent: Line<'static>, + /// Prefix/indent spans to apply to wrapped continuation lines. + pub(crate) subsequent_indent: Line<'static>, + /// Line-level style to apply to all wrapped segments. + pub(crate) line_style: Style, + /// True when this line is preformatted and should not be wrapped like prose. + pub(crate) is_preformatted: bool, +} + struct MarkdownStyles { h1: Style, h2: Style, @@ -56,8 +115,12 @@ impl Default for MarkdownStyles { #[derive(Clone, Debug)] struct IndentContext { + /// Prefix spans to apply for this nesting level (e.g., blockquote `> `, list indentation). prefix: Vec>, + /// Optional list marker spans (e.g., `- ` or `1. `) that apply only to the first visual line of + /// a list item. marker: Option>>, + /// True if this context represents a list indentation level. is_list: bool, } @@ -75,21 +138,45 @@ pub fn render_markdown_text(input: &str) -> Text<'static> { render_markdown_text_with_width(input, None) } +/// Render markdown into a ratatui `Text`, optionally wrapping to a specific width. +/// +/// This is primarily used for non-streaming rendering where storing width-derived wrapping is +/// acceptable or where the caller immediately consumes the output. pub(crate) fn render_markdown_text_with_width(input: &str, width: Option) -> Text<'static> { let mut options = Options::empty(); options.insert(Options::ENABLE_STRIKETHROUGH); let parser = Parser::new_ext(input, options); - let mut w = Writer::new(parser, width); + let mut w = Writer::new(parser, width, true, false); w.run(); w.text } +/// Render markdown into width-agnostic logical lines (no wrapping). +/// +/// This is used by streaming so that the transcript can reflow on resize: wrapping is deferred to +/// the history cell at render time. +pub(crate) fn render_markdown_logical_lines(input: &str) -> Vec { + let mut options = Options::empty(); + options.insert(Options::ENABLE_STRIKETHROUGH); + let parser = Parser::new_ext(input, options); + let mut w = Writer::new(parser, None, false, true); + w.run(); + w.logical_lines +} + +/// A markdown event sink that builds either: +/// - a wrapped `Text` (`emit_text = true`), and/or +/// - width-agnostic [`MarkdownLogicalLine`]s (`emit_logical_lines = true`). +/// +/// The writer tracks markdown structure (paragraphs, lists, blockquotes, code blocks) and builds up +/// a "current logical line". `flush_current_line` commits it to the selected output(s). struct Writer<'a, I> where I: Iterator>, { iter: I, text: Text<'static>, + logical_lines: Vec, styles: MarkdownStyles, inline_styles: Vec