From d5e081948cd7f7106163015bd90ffea65af1ae71 Mon Sep 17 00:00:00 2001 From: Wes Date: Fri, 3 Jul 2026 09:38:48 -0600 Subject: [PATCH] fix(agent): honor stop hook retry budget Remove the premature consecutive-end bypass so persistent Stop hook objections keep the run active until the configured bounded budget is exhausted. Tell agents to establish todo state before promising follow-up. Co-authored-by: Pinky <44b8e82baa6e0e254e0208d68f335c283c94e7b78dd1fa10d5a49d3f13dd0435@sprout-oss.stage.blox.sqprod.co> Signed-off-by: Wes --- crates/buzz-acp/src/base_prompt.md | 1 + crates/buzz-agent/src/agent.rs | 14 ------------ crates/buzz-agent/tests/regressions.rs | 30 +++++++++++++++----------- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/crates/buzz-acp/src/base_prompt.md b/crates/buzz-acp/src/base_prompt.md index 6da252ec1..2b38dcf6e 100644 --- a/crates/buzz-acp/src/base_prompt.md +++ b/crates/buzz-acp/src/base_prompt.md @@ -48,6 +48,7 @@ All replies and delegations — including task assignments to other agents — g - Respond promptly to @mentions. Be direct — no preamble. Name what you did, what you found, or what you need. - **Every turn that processes a user message MUST end with `buzz messages send`.** Your reasoning and tool calls are invisible to users — if you didn't send a message, they saw nothing. A turn that ends without a sent message is a silent failure. +- For work that requires follow-up tools, create an open todo **before** sending the pickup acknowledgment. Keep it open until the deliverable is verified and you have sent a completion or blocker message; never end a turn while promised work remains open. - Use GitHub-flavored Markdown. Fenced code blocks with language tags for syntax highlighting. - No push notifications — poll with `buzz messages get --channel --since `. - Address people by the name in their own message header. diff --git a/crates/buzz-agent/src/agent.rs b/crates/buzz-agent/src/agent.rs index 163a4070e..69019bce9 100644 --- a/crates/buzz-agent/src/agent.rs +++ b/crates/buzz-agent/src/agent.rs @@ -75,10 +75,6 @@ impl RunCtx<'_> { self.history.push(HistoryItem::User(user_text)); let mut round = 0u32; - // Per-prompt latch: only used to detect "LLM said end_turn twice - // in a row with no tool calls between" within this single prompt. - // The cumulative rejection budget lives on the session. - let mut last_was_end_turn = false; loop { if self.cfg.max_rounds > 0 && round >= self.cfg.max_rounds { return Ok(StopReason::MaxTurnRequests); @@ -204,12 +200,6 @@ impl RunCtx<'_> { let stop = map_stop(response.stop); // Only gate genuine end_turn — don't override max_tokens/refusal. if stop == StopReason::EndTurn { - // Consecutive-rejection rule: LLM responded to our last - // objection with no tool calls — accept the end and - // move on rather than loop forever. - if last_was_end_turn { - return Ok(stop); - } if *self.stop_rejections >= self.cfg.stop_max_rejections { return Ok(stop); } @@ -224,7 +214,6 @@ impl RunCtx<'_> { .await; if !objections.is_empty() { *self.stop_rejections = self.stop_rejections.saturating_add(1); - last_was_end_turn = true; push_hook_outputs_as_tool_results(self.history, "_Stop", &objections); continue; } @@ -245,9 +234,6 @@ impl RunCtx<'_> { tool_calls: calls.clone(), }); - // Tool calls executed → reset the consecutive-rejection latch. - last_was_end_turn = false; - if let Some(stop) = self.execute_calls(&calls).await { return Ok(stop); } diff --git a/crates/buzz-agent/tests/regressions.rs b/crates/buzz-agent/tests/regressions.rs index 0bc05b7b6..c73a88c5f 100644 --- a/crates/buzz-agent/tests/regressions.rs +++ b/crates/buzz-agent/tests/regressions.rs @@ -888,21 +888,25 @@ async fn hook_stop_budget_exhausted() { h.shutdown().await; } -/// Consecutive-rejection rule: if the LLM responds to an objection with -/// no tool calls and end_turn again, the agent accepts the end (avoids -/// infinite loops with an unreasonable hook). +/// A persistent `_Stop` objection must keep the turn alive through repeated +/// consecutive end_turn responses. The configured rejection budget is the +/// bounded escape hatch; accepting the second response would silently idle a +/// session that still has open work. #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn hook_stop_consecutive_end_turn() { - // LLM sequence: - // 1. text → _Stop objects (rejections: 0→1, last_was_end_turn=true) - // 2. text again, no tool calls → consecutive rule fires, return end_turn - let llm = spawn_capturing_llm(vec![openai_text("done-1"), openai_text("done-2")]).await; +async fn hook_stop_consecutive_end_turn_uses_rejection_budget() { + // Three consecutive end_turn responses. With max=2, both objections must + // reroll the LLM and the third response is accepted by the budget cap. + let llm = spawn_capturing_llm(vec![ + openai_text("done-1"), + openai_text("done-2"), + openai_text("done-3"), + ]) + .await; let mut h = Harness::spawn_with_env( &llm.url, &[ ("MCP_HOOK_SERVERS", "fake"), - // Set high so we don't trip the budget instead. - ("BUZZ_AGENT_STOP_MAX_REJECTIONS", "10"), + ("BUZZ_AGENT_STOP_MAX_REJECTIONS", "2"), ], ) .await; @@ -926,12 +930,12 @@ async fn hook_stop_consecutive_end_turn() { assert!(r.get("result").is_some(), "errored: {r}"); assert_eq!(r["result"]["stopReason"], "end_turn"); - // Exactly 2 LLM calls — consecutive rule prevented a 3rd round. + // Both objections force another round; the budget permits the third end. let captured = llm.captured.lock().await; assert_eq!( captured.len(), - 2, - "expected 2 LLM calls (consecutive rule), got {}", + 3, + "expected 3 LLM calls (two objections, then budget cap), got {}", captured.len() ); h.shutdown().await;