Azure · paulbatum · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-projects",
-  "Tag": "python/ai/azure-ai-projects_febb246e47"
+  "Tag": "python/ai/azure-ai-projects_314598932e"
 }
@@ -12,10 +12,11 @@
 All tests use the same tool combination but different inputs and workflows.
 """
 
+import json
 from test_base import TestBase, servicePreparer
 from devtools_testutils import recorded_by_proxy, RecordedTransport
 from azure.ai.projects.models import PromptAgentDefinition, CodeInterpreterTool, CodeInterpreterToolAuto, FunctionTool
-
+from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam
 
 class TestAgentCodeInterpreterAndFunction(TestBase):
     """Tests for agents using Code Interpreter + Function Tool combination."""
@@ -25,6 +26,10 @@ class TestAgentCodeInterpreterAndFunction(TestBase):
     def test_calculate_and_save(self, **kwargs):
         """
         Test calculation with Code Interpreter and saving with Function Tool.
+
+        This test verifies that both tools are used:
+        1. Code Interpreter: Performs a calculation that requires actual computation
+        2. Function Tool: Saves the computed result
         """
 
         model = self.test_agents_params["model_deployment_name"]
@@ -36,24 +41,25 @@ def test_calculate_and_save(self, **kwargs):
         # Define function tool
         func_tool = FunctionTool(
             name="save_result",
-            description="Save analysis result",
+            description="Save the calculation result. Must be called to persist the result.",
             parameters={
                 "type": "object",
                 "properties": {
-                    "result": {"type": "string", "description": "The result"},
+                    "calculation": {"type": "string", "description": "Description of the calculation"},
+                    "result": {"type": "string", "description": "The numerical result"},
                 },
-                "required": ["result"],
+                "required": ["calculation", "result"],
                 "additionalProperties": False,
             },
             strict=True,
         )
 
-        # Create agent
+        # Create agent with explicit instructions to use both tools
         agent = project_client.agents.create_version(
             agent_name="code-func-agent",
             definition=PromptAgentDefinition(
                 model=model,
-                instructions="Run calculations and save results.",
+                instructions="You are a calculator assistant. Use code interpreter to perform calculations, then ALWAYS save the result using the save_result function.",
                 tools=[
                     CodeInterpreterTool(container=CodeInterpreterToolAuto()),
                     func_tool,
@@ -63,9 +69,10 @@ def test_calculate_and_save(self, **kwargs):
         )
         print(f"Agent created (id: {agent.id})")
 
-        # Use the agent
+        # Request a calculation that requires Code Interpreter (not trivial math)
+        # 17^4 = 83521 - not something easily computed mentally
         response = openai_client.responses.create(
-            input="Calculate 5 + 3 and save the result.",
+            input="Calculate 17 to the power of 4 using code, then save the result.",
             extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
         )
         self.validate_response(response)
@@ -79,6 +86,10 @@ def test_calculate_and_save(self, **kwargs):
     def test_generate_data_and_report(self, **kwargs):
         """
         Test generating data with Code Interpreter and reporting with Function.
+
+        This test verifies that both tools are used:
+        1. Code Interpreter: Generates random data and calculates statistics
+        2. Function Tool: Creates a report with the computed statistics
         """
 
         model = self.test_agents_params["model_deployment_name"]
@@ -90,25 +101,27 @@ def test_generate_data_and_report(self, **kwargs):
         # Define function tool
         report_function = FunctionTool(
             name="generate_report",
-            description="Generate a report with the provided data",
+            description="Generate and save a report with the analysis results. Must be called to create the report.",
             parameters={
                 "type": "object",
                 "properties": {
                     "title": {"type": "string", "description": "Report title"},
-                    "summary": {"type": "string", "description": "Report summary"},
+                    "data_count": {"type": "integer", "description": "Number of data points analyzed"},
+                    "average": {"type": "number", "description": "Calculated average value"},
+                    "summary": {"type": "string", "description": "Summary of findings"},
                 },
-                "required": ["title", "summary"],
+                "required": ["title", "data_count", "average", "summary"],
                 "additionalProperties": False,
             },
             strict=True,
         )
 
-        # Create agent
+        # Create agent with explicit instructions
         agent = project_client.agents.create_version(
             agent_name="code-func-report-agent",
             definition=PromptAgentDefinition(
                 model=model,
-                instructions="Generate data using code and create reports with the generate_report function.",
+                instructions="You are a data analyst. Use code interpreter to generate and analyze data, then ALWAYS create a report using the generate_report function with the exact statistics you computed.",
                 tools=[
                     CodeInterpreterTool(container=CodeInterpreterToolAuto()),
                     report_function,
@@ -118,9 +131,9 @@ def test_generate_data_and_report(self, **kwargs):
         )
         print(f"Agent created (id: {agent.id})")
 
-        # Request data generation and report
+        # Request data generation and report - use a fixed seed for reproducibility in verification
         response = openai_client.responses.create(
-            input="Generate a list of 10 random numbers between 1 and 100, calculate their average, and create a report.",
+            input="Using Python with random.seed(42), generate exactly 10 random integers between 1 and 100, calculate their average, and create a report with the results.",
             extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
         )
 

@@ -26,6 +26,10 @@ class TestAgentFileSearchAndCodeInterpreter(TestBase):
     def test_find_and_analyze_data(self, **kwargs):
         """
         Test finding data with File Search and analyzing with Code Interpreter.
+
+        This test verifies that both tools are used:
+        1. File Search: Agent finds the data file containing numbers
+        2. Code Interpreter: Agent calculates the average of those numbers
         """
 
         model = self.test_agents_params["model_deployment_name"]
@@ -34,25 +38,49 @@ def test_find_and_analyze_data(self, **kwargs):
         project_client = self.create_client(operation_group="agents", **kwargs)
         openai_client = project_client.get_openai_client()
 
-        # Create data file
-        txt_content = "Sample data: 10, 20, 30, 40, 50"
+        # Create data file with numbers that require actual computation
+        # Numbers: 31, 20, 52, 48, 45, 34, 30, 86, 28, 71, 21, 20, 28, 44, 46
+        # Sum: 604, Count: 15, Average: 40.266... ≈ 40.27
+        # This is impossible to calculate mentally - requires Code Interpreter
+        txt_content = """Sensor Readings Log - Experiment #2847
+
+The following temperature readings (Celsius) were recorded over a 15-hour period:
+
+Hour 1: 31
+Hour 2: 20
+Hour 3: 52
+Hour 4: 48
+Hour 5: 45
+Hour 6: 34
+Hour 7: 30
+Hour 8: 86
+Hour 9: 28
+Hour 10: 71
+Hour 11: 21
+Hour 12: 20
+Hour 13: 28
+Hour 14: 44
+Hour 15: 46
+
+End of sensor log.
+"""
         vector_store = openai_client.vector_stores.create(name="DataStore")
 
         txt_file = BytesIO(txt_content.encode("utf-8"))
-        txt_file.name = "data.txt"
+        txt_file.name = "sensor_readings.txt"
 
         file = openai_client.vector_stores.files.upload_and_poll(
             vector_store_id=vector_store.id,
             file=txt_file,
         )
         print(f"File uploaded (id: {file.id})")
 
-        # Create agent
+        # Create agent with explicit instructions to use both tools
         agent = project_client.agents.create_version(
             agent_name="file-search-code-agent",
             definition=PromptAgentDefinition(
                 model=model,
-                instructions="Find data and analyze it.",
+                instructions="You are a data analyst. Use file search to find data files, then use code interpreter to perform calculations on the data.",
                 tools=[
                     FileSearchTool(vector_store_ids=[vector_store.id]),
                     CodeInterpreterTool(container=CodeInterpreterToolAuto()),
@@ -62,9 +90,9 @@ def test_find_and_analyze_data(self, **kwargs):
         )
         print(f"Agent created (id: {agent.id})")
 
-        # Use the agent
+        # Request that requires both tools: find data AND calculate
         response = openai_client.responses.create(
-            input="Find the data file and calculate the average.",
+            input="Find the sensor readings file and use code to calculate the average temperature. Show me the result.",
             extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
         )
         self.validate_response(response)
@@ -79,7 +107,11 @@ def test_find_and_analyze_data(self, **kwargs):
     @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX)
     def test_analyze_code_file(self, **kwargs):
         """
-        Test finding code file and analyzing it.
+        Test finding code file and running it with Code Interpreter.
+
+        This test verifies that both tools are used:
+        1. File Search: Agent finds the Python code file
+        2. Code Interpreter: Agent executes the code and returns the computed result
         """
 
         model = self.test_agents_params["model_deployment_name"]
@@ -88,14 +120,18 @@ def test_analyze_code_file(self, **kwargs):
         project_client = self.create_client(operation_group="agents", **kwargs)
         openai_client = project_client.get_openai_client()
 
-        # Create Python code file
-        python_code = """def fibonacci(n):
+        # Create Python code file with a function that computes a specific value
+        # fibonacci(15) = 610 - this is not a commonly memorized value
+        python_code = """# Fibonacci sequence calculator
+
+def fibonacci(n):
+    \"\"\"Calculate the nth Fibonacci number recursively.\"\"\"
     if n <= 1:
         return n
     return fibonacci(n-1) + fibonacci(n-2)
 
-result = fibonacci(10)
-print(f"Fibonacci(10) = {result}")
+# The code needs to be executed to find what fibonacci(15) equals
+# This is not a commonly known value - it requires actual computation
 """
 
         vector_store = openai_client.vector_stores.create(name="CodeAnalysisStore")
@@ -109,37 +145,46 @@ def test_analyze_code_file(self, **kwargs):
         )
         print(f"Code file uploaded (id: {file.id})")
 
-        # Create agent
+        # Create agent with explicit instructions to run code
         agent = project_client.agents.create_version(
             agent_name="file-search-code-analysis-agent",
             definition=PromptAgentDefinition(
                 model=model,
-                instructions="Find code files and analyze them. You can run code to test it.",
+                instructions="You are a code analyst. Use file search to find code files, then use code interpreter to execute and test the code.",
                 tools=[
                     FileSearchTool(vector_store_ids=[vector_store.id]),
                     CodeInterpreterTool(container=CodeInterpreterToolAuto()),
                 ],
             ),
-            description="Agent for code analysis.",
+            description="Agent for code analysis and execution.",
         )
         print(f"Agent created (id: {agent.id})")
 
-        # Request analysis
+        # Request that requires both tools: find code AND execute it
         response = openai_client.responses.create(
-            input="Find the fibonacci code and explain what it does. What is the computational complexity?",
+            input="Find the fibonacci code file and run it to calculate fibonacci(15). What is the result?",
             extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
         )
 
         response_text = response.output_text
-        print(f"Response: {response_text[:300]}...")
+        print(f"Response: {response_text[:400]}...")
+
+        # Verify response is meaningful
+        assert len(response_text) > 30, "Expected detailed response"
 
-        assert len(response_text) > 50
+        # Verify File Search was used - response should reference the fibonacci code
         response_lower = response_text.lower()
         assert any(
-            keyword in response_lower for keyword in ["fibonacci", "recursive", "complexity", "exponential"]
-        ), "Expected analysis of fibonacci algorithm"
+            keyword in response_lower for keyword in ["fibonacci", "function", "recursive", "code"]
+        ), f"Expected response to reference the fibonacci code. Got: {response_text[:200]}"
+
+        # Verify Code Interpreter executed the code and got the correct result
+        # fibonacci(15) = 610 - this requires actual execution
+        assert "610" in response_text, f"Expected fibonacci(15) = 610 in response. Got: {response_text[:300]}"
 
-        print("✓ Code file analysis completed")
+        print("[PASS] File Search + Code Interpreter both verified!")
+        print("  - File Search: Found the fibonacci code file")
+        print("  - Code Interpreter: Executed code and computed fibonacci(15) = 610")
 
         # Cleanup
         project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version)