feat: add robustness features and update tests

silacode · silacode · commit cc84e25bc54f · 2026-01-04T18:58:05.000-05:00
- Add retry logic with exponential backoff (tenacity)
- Add conversation truncation to prevent token overflow
- Add tool execution timeout (30s)
- Update tests for new robustness features
- Update README with robustness docs and parallel execution
diff --git a/README.md b/README.md
@@ -11,6 +11,8 @@ A CLI-based customer support agent powered by OpenAI with SQL database and RAG t
 - **Policy Search**: RAG-powered search across company policies (returns, shipping, warranty)
 - **Conversation History**: Maintains context across multiple exchanges
 - **Multi-Agent SQL**: Uses generator and reviewer agents for accurate SQL queries
+- **Robustness**: Retry logic, conversation truncation, and tool timeouts
+- **CI/CD**: GitHub Actions with Python 3.11/3.12 matrix testing
 
 ## Architecture
 
@@ -87,6 +89,18 @@ Think step by step:
 
 This approach leverages the model's chain-of-thought capabilities for more accurate and explainable responses.
 
+### Parallel Tool Execution
+
+When the LLM returns multiple tool calls in a single response, they are executed **concurrently** using `asyncio.gather()` for better performance:
+
+```python
+# Multiple tools run in parallel
+results = await asyncio.gather(*[
+    self._execute_tool(item.name, item.arguments)
+    for item in output if item.type == "function_call"
+])
+```
+
 ### Component Overview
 
 | Component | Description |
@@ -230,6 +244,16 @@ uv run pytest --cov=. --cov-report=html
 2. Add the OpenAI function schema in `tools/definitions.py`
 3. Register the handler in `tools/router.py`
 
+### Robustness Features
+
+The agent includes several reliability improvements:
+
+| Feature | Description |
+|---------|-------------|
+| **Retry Logic** | 3 attempts with exponential backoff for transient API failures |
+| **Conversation Truncation** | Keeps last 40 items (~20 turns) to prevent token overflow |
+| **Tool Timeout** | 30-second timeout prevents hanging on slow tool execution |
+
 ## Sample Data
 
 The agent comes pre-seeded with:
diff --git a/agent/core.py b/agent/core.py
@@ -3,8 +3,14 @@
 import json
 from typing import Any, Callable, cast
 
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, APIError, APIConnectionError, RateLimitError
 from openai.types.responses import ResponseInputItemParam, EasyInputMessageParam
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
 
 from tools import TOOLS, handle_tool_call
 
@@ -14,6 +20,10 @@
 # Callback type for agent activity notifications
 AgentCallback = Callable[[str, str, dict], None]
 
+# Configuration
+MAX_CONVERSATION_ITEMS = 40  # ~20 turns (user + assistant)
+TOOL_TIMEOUT_SECONDS = 30.0
+
 
 INSTRUCTIONS = """You are a customer support agent for an e-commerce company.
 
@@ -75,14 +85,11 @@ async def chat(self, user_message: str) -> str:
         user_msg: EasyInputMessageParam = {"role": "user", "content": user_message}
         self.conversation.append(user_msg)
 
-        # Call OpenAI Responses API
-        response = await self.client.responses.create(
-            model=self.model,
-            instructions=INSTRUCTIONS,
-            input=self.conversation,
-            tools=TOOLS,
-            reasoning={"effort": "medium"},
-        )
+        # Truncate conversation if too long to avoid token limits
+        self._truncate_conversation()
+
+        # Call OpenAI Responses API with retry
+        response = await self._call_api()
 
         # Process output items - handle function calls
         while self._has_function_calls(response.output):
@@ -95,19 +102,35 @@ async def chat(self, user_message: str) -> str:
             )
             self.conversation.extend(tool_outputs)
 
-            response = await self.client.responses.create(
-                model=self.model,
-                instructions=INSTRUCTIONS,
-                input=self.conversation,
-                tools=TOOLS,
-                reasoning={"effort": "medium"},
-            )
+            response = await self._call_api()
 
         # Add final response to conversation history
         self.conversation.extend(cast(list[ResponseInputItemParam], response.output))
 
         return response.output_text
 
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        retry=retry_if_exception_type((APIError, APIConnectionError, RateLimitError)),
+        reraise=True,
+    )
+    async def _call_api(self):
+        """Call OpenAI API with retry logic for transient failures."""
+        return await self.client.responses.create(
+            model=self.model,
+            instructions=INSTRUCTIONS,
+            input=self.conversation,
+            tools=TOOLS,
+            reasoning={"effort": "medium"},
+        )
+
+    def _truncate_conversation(self) -> None:
+        """Truncate conversation to prevent token overflow."""
+        if len(self.conversation) > MAX_CONVERSATION_ITEMS:
+            # Keep the most recent items
+            self.conversation = self.conversation[-MAX_CONVERSATION_ITEMS:]
+
     def _has_function_calls(self, output: list[Any]) -> bool:
         """Check if output contains any function calls."""
         return any(item.type == "function_call" for item in output)
@@ -139,13 +162,19 @@ async def _process_function_calls(
         )
 
     async def _execute_tool(self, name: str, arguments: str) -> str:
-        """Execute a single tool call and return the result."""
+        """Execute a single tool call with timeout."""
         args = json.loads(arguments)
 
         if self.on_tool_call:
             self.on_tool_call(name, args)
 
-        return await handle_tool_call(name, args, self.on_agent_activity)
+        try:
+            return await asyncio.wait_for(
+                handle_tool_call(name, args, self.on_agent_activity),
+                timeout=TOOL_TIMEOUT_SECONDS,
+            )
+        except asyncio.TimeoutError:
+            return f"Error: Tool '{name}' timed out after {TOOL_TIMEOUT_SECONDS}s"
 
     def clear_history(self) -> None:
         """Clear conversation history."""
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,6 +10,7 @@ dependencies = [
     "openai>=2.14.0",
     "python-dotenv>=1.2.1",
     "rich>=14.2.0",
+    "tenacity>=9.1.2",
 ]
 
 [dependency-groups]
diff --git a/tests/test_agent_core.py b/tests/test_agent_core.py
@@ -1,9 +1,14 @@
 """Tests for the main SupportAgent."""
 
+import asyncio
+from typing import cast
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
+from openai import APIConnectionError
+from openai.types.responses import EasyInputMessageParam
+
 
 class MockOutputItem:
     """Mock for response output items."""
@@ -93,11 +98,10 @@ async def test_handles_function_calls(self):
                     "function_call",
                     call_id="call_123",
                     name="search_policies",
-                    arguments='{"question": "return policy"}'
+                    arguments='{"question": "return policy"}',
                 )
                 first_response = MockResponse(
-                    output_text="",
-                    output=[function_call_output]
+                    output_text="", output=[function_call_output]
                 )
 
                 # Second response is final message
@@ -135,9 +139,11 @@ def mock_callback(name, args):
                     "function_call",
                     call_id="call_123",
                     name="search_policies",
-                    arguments='{"question": "test"}'
+                    arguments='{"question": "test"}',
+                )
+                first_response = MockResponse(
+                    output_text="", output=[function_call_output]
                 )
-                first_response = MockResponse(output_text="", output=[function_call_output])
                 final_response = MockResponse(output_text="Done")
 
                 mock_client.responses.create = AsyncMock(
@@ -167,13 +173,13 @@ async def test_multiple_function_calls(self):
                     "function_call",
                     call_id="call_1",
                     name="search_policies",
-                    arguments='{"question": "returns"}'
+                    arguments='{"question": "returns"}',
                 )
                 call2 = MockOutputItem(
                     "function_call",
                     call_id="call_2",
                     name="query_orders_database",
-                    arguments='{"query": "SELECT 1"}'
+                    arguments='{"query": "SELECT 1"}',
                 )
                 first_response = MockResponse(output_text="", output=[call1, call2])
                 final_response = MockResponse(output_text="Here's the info")
@@ -242,3 +248,101 @@ def test_has_function_calls_mixed(self):
 
             assert agent._has_function_calls(output) is True
 
+    def test_truncate_conversation_when_over_limit(self):
+        """Should truncate conversation when over MAX_CONVERSATION_ITEMS."""
+        with patch("agent.core.AsyncOpenAI"):
+            from agent.core import SupportAgent, MAX_CONVERSATION_ITEMS
+
+            agent = SupportAgent()
+            # Add more items than the limit
+            for i in range(MAX_CONVERSATION_ITEMS + 10):
+                msg: EasyInputMessageParam = {"role": "user", "content": f"msg {i}"}
+                agent.conversation.append(msg)
+
+            agent._truncate_conversation()
+
+            assert len(agent.conversation) == MAX_CONVERSATION_ITEMS
+
+    def test_truncate_conversation_keeps_recent(self):
+        """Should keep most recent messages when truncating."""
+        with patch("agent.core.AsyncOpenAI"):
+            from agent.core import SupportAgent, MAX_CONVERSATION_ITEMS
+
+            agent = SupportAgent()
+            # Add numbered messages
+            for i in range(MAX_CONVERSATION_ITEMS + 5):
+                msg: EasyInputMessageParam = {"role": "user", "content": f"msg {i}"}
+                agent.conversation.append(msg)
+
+            agent._truncate_conversation()
+
+            # Should have kept the last MAX_CONVERSATION_ITEMS messages
+            first_item = cast(EasyInputMessageParam, agent.conversation[0])
+            last_item = cast(EasyInputMessageParam, agent.conversation[-1])
+            assert first_item.get("content") == "msg 5"
+            assert last_item.get("content") == f"msg {MAX_CONVERSATION_ITEMS + 4}"
+
+    def test_truncate_conversation_no_op_when_under_limit(self):
+        """Should not truncate when under limit."""
+        with patch("agent.core.AsyncOpenAI"):
+            from agent.core import SupportAgent
+
+            agent = SupportAgent()
+            msg1: EasyInputMessageParam = {"role": "user", "content": "msg 1"}
+            msg2: EasyInputMessageParam = {"role": "user", "content": "msg 2"}
+            agent.conversation.append(msg1)
+            agent.conversation.append(msg2)
+
+            agent._truncate_conversation()
+
+            assert len(agent.conversation) == 2
+
+    @pytest.mark.asyncio
+    async def test_tool_timeout_returns_error(self):
+        """Should return error message when tool times out."""
+        with patch("agent.core.AsyncOpenAI") as mock_openai:
+            with patch("agent.core.handle_tool_call") as mock_handle:
+                with patch("agent.core.TOOL_TIMEOUT_SECONDS", 0.01):
+                    mock_client = MagicMock()
+                    mock_openai.return_value = mock_client
+
+                    # Make handle_tool_call hang
+                    async def slow_handler(*args, **kwargs):
+                        await asyncio.sleep(1)
+                        return "result"
+
+                    mock_handle.side_effect = slow_handler
+
+                    from agent.core import SupportAgent
+
+                    agent = SupportAgent()
+                    result = await agent._execute_tool(
+                        "search_policies", '{"question": "test"}'
+                    )
+
+                    assert "timed out" in result
+
+    @pytest.mark.asyncio
+    async def test_api_retry_on_transient_error(self):
+        """Should retry API calls on transient errors."""
+        with patch("agent.core.AsyncOpenAI") as mock_openai:
+            mock_client = MagicMock()
+            mock_openai.return_value = mock_client
+
+            # Fail twice, then succeed
+            mock_response = MockResponse(output_text="Success after retry")
+            mock_client.responses.create = AsyncMock(
+                side_effect=[
+                    APIConnectionError(request=MagicMock()),
+                    APIConnectionError(request=MagicMock()),
+                    mock_response,
+                ]
+            )
+
+            from agent.core import SupportAgent
+
+            agent = SupportAgent()
+            result = await agent.chat("Hello")
+
+            assert result == "Success after retry"
+            assert mock_client.responses.create.call_count == 3
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@ dependencies = [`
`10`	`10`	`"openai>=2.14.0",`
`11`	`11`	`"python-dotenv>=1.2.1",`
`12`	`12`	`"rich>=14.2.0",`
	`13`	`+ "tenacity>=9.1.2",`
`13`	`14`	`]`
`14`	`15`
`15`	`16`	`[dependency-groups]`