Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 30 additions & 44 deletions crates/tui/src/core/engine/tests.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use super::*;

use super::context::{COMPACTION_SUMMARY_MARKER, TURN_MAX_OUTPUT_TOKENS};
use super::turn_loop::{
auto_review_force_prompt_overrides_auto_approve, registered_tool_approval_required,
tool_error_degradation_runtime_hint,
};
use super::turn_loop::{registered_tool_approval_required, tool_error_degradation_runtime_hint};
use crate::config::ApiProvider;
use crate::models::{SystemBlock, Usage};
use crate::test_support::{EnvVarGuard, lock_test_env};
Expand Down Expand Up @@ -516,7 +513,6 @@ fn auto_review_policy_forces_prompt_for_shell_git_push() {
);
assert_eq!(audit["decision"], "hold_for_review");
assert_eq!(audit["action_kind"], "publish");
assert!(auto_review_force_prompt_overrides_auto_approve(&audit));
}

#[test]
Expand Down Expand Up @@ -2724,11 +2720,15 @@ async fn yolo_mode_does_not_prompt_for_background_shell_safety_floor() {

#[tokio::test]
#[allow(clippy::await_holding_lock)]
async fn yolo_mode_forces_prompt_for_publish_like_shell() {
// YOLO keeps ordinary/background approvals out of the way, but publish-like
// actions are deliberately durable-review holds. They must still surface a
// forced prompt even when `auto_approve` is true.
use wiremock::matchers::{body_string_contains, method, path};
async fn yolo_mode_does_not_force_prompt_for_publish_like_shell() {
// #3790: the mode is the single approval authority. YOLO (`auto_approve`)
// is a true no-prompt contract — publish-like actions no longer carve out a
// forced prompt past YOLO. The tool runs without any ApprovalRequired.
// (Agent mode still reviews publish; see the auto_review_policy_* unit
// tests.) `cargo publish --dry-run` has no Cargo.toml in the temp workspace,
// so it fails fast with no network or side effects — we only assert it ran
// unprompted.
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};

let _lock = lock_test_env();
Expand All @@ -2752,15 +2752,16 @@ async fn yolo_mode_forces_prompt_for_publish_like_shell() {
"data: [DONE]\n\n",
);

// First model turn → emit the publish-like tool call (served once). The
// follow-up turn (carrying the tool result) → the done/stop response.
Mock::given(method("POST"))
.and(path("/v1/chat/completions"))
.and(body_string_contains("denied by user"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_string(done_sse),
.set_body_string(tool_call_sse),
)
.expect(1)
.up_to_n_times(1)
.with_priority(1)
.mount(&server)
.await;
Expand All @@ -2769,9 +2770,8 @@ async fn yolo_mode_forces_prompt_for_publish_like_shell() {
.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_string(tool_call_sse),
.set_body_string(done_sse),
)
.expect(1)
.with_priority(2)
.mount(&server)
.await;
Expand All @@ -2792,7 +2792,6 @@ async fn yolo_mode_forces_prompt_for_publish_like_shell() {
&api_config,
);
let run_task = tokio::spawn(engine.run());
let handle_for_approval = handle.clone();

handle
.send(Op::SendMessage {
Expand Down Expand Up @@ -2821,40 +2820,20 @@ async fn yolo_mode_forces_prompt_for_publish_like_shell() {
.await
.expect("send model turn");

let mut saw_forced_approval = false;
let mut saw_approval_request = false;
let mut saw_tool_complete = false;
let mut rx = handle.rx_event.write().await;
while let Some(event) = tokio::time::timeout(model_turn_event_timeout(), rx.recv())
.await
.expect("timed out waiting for engine event")
{
match event {
Event::ApprovalRequired {
id,
tool_name,
description,
input,
approval_force_prompt,
..
} => {
saw_forced_approval = true;
assert_eq!(tool_name, "exec_shell");
assert_eq!(input["command"], json!("cargo publish --dry-run"));
assert!(description.contains("publish-like"));
assert!(
approval_force_prompt,
"publish-like YOLO prompts must bypass TUI auto-approval"
);
handle_for_approval
.deny_tool_call(id)
.await
.expect("deny publish-like shell");
Event::ApprovalRequired { .. } => {
// YOLO must not surface any approval for a publish-like action.
saw_approval_request = true;
}
Event::ToolCallComplete { name, result, .. } if name == "exec_shell" => {
let err = result.expect_err("publish-like shell should be denied");
assert!(
err.to_string().contains("denied by user"),
"unexpected shell result: {err:?}"
);
Event::ToolCallComplete { name, .. } if name == "exec_shell" => {
saw_tool_complete = true;
}
Event::TurnComplete { status, .. } => {
assert_eq!(status, TurnOutcomeStatus::Completed);
Expand All @@ -2867,7 +2846,14 @@ async fn yolo_mode_forces_prompt_for_publish_like_shell() {

handle.send(Op::Shutdown).await.expect("shutdown engine");
run_task.await.expect("engine task");
assert!(saw_forced_approval);
assert!(
!saw_approval_request,
"YOLO must not force an approval prompt for publish-like shell (#3790)"
);
assert!(
saw_tool_complete,
"publish-like shell should run under YOLO without a prompt"
);
}

#[tokio::test]
Expand Down
33 changes: 8 additions & 25 deletions crates/tui/src/core/engine/turn_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,6 @@ pub(super) fn registered_tool_approval_required(
!auto_approve
}

pub(super) fn auto_review_force_prompt_overrides_auto_approve(
audit_event: &serde_json::Value,
) -> bool {
audit_event
.get("decision")
.and_then(serde_json::Value::as_str)
== Some("hold_for_review")
&& audit_event
.get("action_kind")
.and_then(serde_json::Value::as_str)
== Some("publish")
}

pub(super) fn tool_error_degradation_runtime_hint(
consecutive_tool_error_steps: u32,
step_error_tool_names: &[String],
Expand Down Expand Up @@ -1828,18 +1815,14 @@ impl Engine {
match decision {
AutoReviewPlanDecision::NoChange => {}
AutoReviewPlanDecision::ForcePrompt(reason) => {
// YOLO mode (auto_approve) skips ordinary review
// holds, including Background+Destructive shell
// holds created by the coarse shell risk fallback.
// Publish-like actions are different: the
// safety_floor marks them as durable-review holds
// regardless of mode, so they must still surface a
// forced prompt. A Block decision (typed deny
// rules / hard blocks) still holds below
// regardless of mode.
if !self.session.auto_approve
|| auto_review_force_prompt_overrides_auto_approve(&audit_event)
{
// The mode is the single approval authority (#3790):
// YOLO (auto_approve) skips ALL heuristic review
// holds, including the publish-like safety floor —
// there is no longer a publish carve-out that forces
// a prompt past YOLO. Agent/Plan still review these.
// A Block decision (typed deny rules / hard blocks)
// still holds below regardless of mode.
if !self.session.auto_approve {
approval_required = true;
approval_description = reason;
approval_force_prompt = true;
Expand Down