From d5e081948cd7f7106163015bd90ffea65af1ae71 Mon Sep 17 00:00:00 2001
From: Wes <wesbillman@users.noreply.github.com>
Date: Fri, 3 Jul 2026 09:38:48 -0600
Subject: [PATCH] fix(agent): honor stop hook retry budget

Remove the premature consecutive-end bypass so persistent Stop hook
objections keep the run active until the configured bounded budget is
exhausted. Tell agents to establish todo state before promising follow-up.

Co-authored-by: Pinky <44b8e82baa6e0e254e0208d68f335c283c94e7b78dd1fa10d5a49d3f13dd0435@sprout-oss.stage.blox.sqprod.co>
Signed-off-by: Wes <wesbillman@users.noreply.github.com>
---
 crates/buzz-acp/src/base_prompt.md     |  1 +
 crates/buzz-agent/src/agent.rs         | 14 ------------
 crates/buzz-agent/tests/regressions.rs | 30 +++++++++++++++-----------
 3 files changed, 18 insertions(+), 27 deletions(-)
diff --git a/crates/buzz-acp/src/base_prompt.md b/crates/buzz-acp/src/base_prompt.md
index 6da252ec1..2b38dcf6e 100644
--- a/crates/buzz-acp/src/base_prompt.md
+++ b/crates/buzz-acp/src/base_prompt.md
@@ -48,6 +48,7 @@ All replies and delegations — including task assignments to other agents — g
 
 - Respond promptly to @mentions. Be direct — no preamble. Name what you did, what you found, or what you need.
 - **Every turn that processes a user message MUST end with `buzz messages send`.** Your reasoning and tool calls are invisible to users — if you didn't send a message, they saw nothing. A turn that ends without a sent message is a silent failure.
+- For work that requires follow-up tools, create an open todo **before** sending the pickup acknowledgment. Keep it open until the deliverable is verified and you have sent a completion or blocker message; never end a turn while promised work remains open.
 - Use GitHub-flavored Markdown. Fenced code blocks with language tags for syntax highlighting.
 - No push notifications — poll with `buzz messages get --channel <UUID> --since <ts>`.
 - Address people by the name in their own message header.
diff --git a/crates/buzz-agent/src/agent.rs b/crates/buzz-agent/src/agent.rs
index 163a4070e..69019bce9 100644
--- a/crates/buzz-agent/src/agent.rs
+++ b/crates/buzz-agent/src/agent.rs
@@ -75,10 +75,6 @@ impl RunCtx<'_> {
         self.history.push(HistoryItem::User(user_text));
 
         let mut round = 0u32;
-        // Per-prompt latch: only used to detect "LLM said end_turn twice
-        // in a row with no tool calls between" within this single prompt.
-        // The cumulative rejection budget lives on the session.
-        let mut last_was_end_turn = false;
         loop {
             if self.cfg.max_rounds > 0 && round >= self.cfg.max_rounds {
                 return Ok(StopReason::MaxTurnRequests);
@@ -204,12 +200,6 @@ impl RunCtx<'_> {
                 let stop = map_stop(response.stop);
                 // Only gate genuine end_turn — don't override max_tokens/refusal.
                 if stop == StopReason::EndTurn {
-                    // Consecutive-rejection rule: LLM responded to our last
-                    // objection with no tool calls — accept the end and
-                    // move on rather than loop forever.
-                    if last_was_end_turn {
-                        return Ok(stop);
-                    }
                     if *self.stop_rejections >= self.cfg.stop_max_rejections {
                         return Ok(stop);
                     }
@@ -224,7 +214,6 @@ impl RunCtx<'_> {
                         .await;
                     if !objections.is_empty() {
                         *self.stop_rejections = self.stop_rejections.saturating_add(1);
-                        last_was_end_turn = true;
                         push_hook_outputs_as_tool_results(self.history, "_Stop", &objections);
                         continue;
                     }
@@ -245,9 +234,6 @@ impl RunCtx<'_> {
                 tool_calls: calls.clone(),
             });
 
-            // Tool calls executed → reset the consecutive-rejection latch.
-            last_was_end_turn = false;
-
             if let Some(stop) = self.execute_calls(&calls).await {
                 return Ok(stop);
             }
diff --git a/crates/buzz-agent/tests/regressions.rs b/crates/buzz-agent/tests/regressions.rs
index 0bc05b7b6..c73a88c5f 100644
--- a/crates/buzz-agent/tests/regressions.rs
+++ b/crates/buzz-agent/tests/regressions.rs
@@ -888,21 +888,25 @@ async fn hook_stop_budget_exhausted() {
     h.shutdown().await;
 }
 
-/// Consecutive-rejection rule: if the LLM responds to an objection with
-/// no tool calls and end_turn again, the agent accepts the end (avoids
-/// infinite loops with an unreasonable hook).
+/// A persistent `_Stop` objection must keep the turn alive through repeated
+/// consecutive end_turn responses. The configured rejection budget is the
+/// bounded escape hatch; accepting the second response would silently idle a
+/// session that still has open work.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn hook_stop_consecutive_end_turn() {
-    // LLM sequence:
-    //   1. text → _Stop objects (rejections: 0→1, last_was_end_turn=true)
-    //   2. text again, no tool calls → consecutive rule fires, return end_turn
-    let llm = spawn_capturing_llm(vec![openai_text("done-1"), openai_text("done-2")]).await;
+async fn hook_stop_consecutive_end_turn_uses_rejection_budget() {
+    // Three consecutive end_turn responses. With max=2, both objections must
+    // reroll the LLM and the third response is accepted by the budget cap.
+    let llm = spawn_capturing_llm(vec![
+        openai_text("done-1"),
+        openai_text("done-2"),
+        openai_text("done-3"),
+    ])
+    .await;
     let mut h = Harness::spawn_with_env(
         &llm.url,
         &[
             ("MCP_HOOK_SERVERS", "fake"),
-            // Set high so we don't trip the budget instead.
-            ("BUZZ_AGENT_STOP_MAX_REJECTIONS", "10"),
+            ("BUZZ_AGENT_STOP_MAX_REJECTIONS", "2"),
         ],
     )
     .await;
@@ -926,12 +930,12 @@ async fn hook_stop_consecutive_end_turn() {
     assert!(r.get("result").is_some(), "errored: {r}");
     assert_eq!(r["result"]["stopReason"], "end_turn");
 
-    // Exactly 2 LLM calls — consecutive rule prevented a 3rd round.
+    // Both objections force another round; the budget permits the third end.
     let captured = llm.captured.lock().await;
     assert_eq!(
         captured.len(),
-        2,
-        "expected 2 LLM calls (consecutive rule), got {}",
+        3,
+        "expected 3 LLM calls (two objections, then budget cap), got {}",
         captured.len()
     );
     h.shutdown().await;