Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdk/ai/azure-ai-projects/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "python",
"TagPrefix": "python/ai/azure-ai-projects",
"Tag": "python/ai/azure-ai-projects_febb246e47"
"Tag": "python/ai/azure-ai-projects_314598932e"
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
All tests use the same tool combination but different inputs and workflows.
"""

import json
from test_base import TestBase, servicePreparer
from devtools_testutils import recorded_by_proxy, RecordedTransport
from azure.ai.projects.models import PromptAgentDefinition, CodeInterpreterTool, CodeInterpreterToolAuto, FunctionTool

from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam

class TestAgentCodeInterpreterAndFunction(TestBase):
"""Tests for agents using Code Interpreter + Function Tool combination."""
Expand All @@ -25,6 +26,10 @@ class TestAgentCodeInterpreterAndFunction(TestBase):
def test_calculate_and_save(self, **kwargs):
"""
Test calculation with Code Interpreter and saving with Function Tool.

This test verifies that both tools are used:
1. Code Interpreter: Performs a calculation that requires actual computation
2. Function Tool: Saves the computed result
"""

model = self.test_agents_params["model_deployment_name"]
Expand All @@ -36,24 +41,25 @@ def test_calculate_and_save(self, **kwargs):
# Define function tool
func_tool = FunctionTool(
name="save_result",
description="Save analysis result",
description="Save the calculation result. Must be called to persist the result.",
parameters={
"type": "object",
"properties": {
"result": {"type": "string", "description": "The result"},
"calculation": {"type": "string", "description": "Description of the calculation"},
"result": {"type": "string", "description": "The numerical result"},
},
"required": ["result"],
"required": ["calculation", "result"],
"additionalProperties": False,
},
strict=True,
)

# Create agent
# Create agent with explicit instructions to use both tools
agent = project_client.agents.create_version(
agent_name="code-func-agent",
definition=PromptAgentDefinition(
model=model,
instructions="Run calculations and save results.",
instructions="You are a calculator assistant. Use code interpreter to perform calculations, then ALWAYS save the result using the save_result function.",
tools=[
CodeInterpreterTool(container=CodeInterpreterToolAuto()),
func_tool,
Expand All @@ -63,9 +69,10 @@ def test_calculate_and_save(self, **kwargs):
)
print(f"Agent created (id: {agent.id})")

# Use the agent
# Request a calculation that requires Code Interpreter (not trivial math)
# 17^4 = 83521 - not something easily computed mentally
response = openai_client.responses.create(
input="Calculate 5 + 3 and save the result.",
input="Calculate 17 to the power of 4 using code, then save the result.",
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)
self.validate_response(response)
Expand All @@ -79,6 +86,10 @@ def test_calculate_and_save(self, **kwargs):
def test_generate_data_and_report(self, **kwargs):
"""
Test generating data with Code Interpreter and reporting with Function.

This test verifies that both tools are used:
1. Code Interpreter: Generates random data and calculates statistics
2. Function Tool: Creates a report with the computed statistics
"""

model = self.test_agents_params["model_deployment_name"]
Expand All @@ -90,25 +101,27 @@ def test_generate_data_and_report(self, **kwargs):
# Define function tool
report_function = FunctionTool(
name="generate_report",
description="Generate a report with the provided data",
description="Generate and save a report with the analysis results. Must be called to create the report.",
parameters={
"type": "object",
"properties": {
"title": {"type": "string", "description": "Report title"},
"summary": {"type": "string", "description": "Report summary"},
"data_count": {"type": "integer", "description": "Number of data points analyzed"},
"average": {"type": "number", "description": "Calculated average value"},
"summary": {"type": "string", "description": "Summary of findings"},
},
"required": ["title", "summary"],
"required": ["title", "data_count", "average", "summary"],
"additionalProperties": False,
},
strict=True,
)

# Create agent
# Create agent with explicit instructions
agent = project_client.agents.create_version(
agent_name="code-func-report-agent",
definition=PromptAgentDefinition(
model=model,
instructions="Generate data using code and create reports with the generate_report function.",
instructions="You are a data analyst. Use code interpreter to generate and analyze data, then ALWAYS create a report using the generate_report function with the exact statistics you computed.",
tools=[
CodeInterpreterTool(container=CodeInterpreterToolAuto()),
report_function,
Expand All @@ -118,9 +131,9 @@ def test_generate_data_and_report(self, **kwargs):
)
print(f"Agent created (id: {agent.id})")

# Request data generation and report
# Request data generation and report - use a fixed seed for reproducibility in verification
response = openai_client.responses.create(
input="Generate a list of 10 random numbers between 1 and 100, calculate their average, and create a report.",
input="Using Python with random.seed(42), generate exactly 10 random integers between 1 and 100, calculate their average, and create a report with the results.",
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class TestAgentFileSearchAndCodeInterpreter(TestBase):
def test_find_and_analyze_data(self, **kwargs):
"""
Test finding data with File Search and analyzing with Code Interpreter.

This test verifies that both tools are used:
1. File Search: Agent finds the data file containing numbers
2. Code Interpreter: Agent calculates the average of those numbers
"""

model = self.test_agents_params["model_deployment_name"]
Expand All @@ -34,25 +38,49 @@ def test_find_and_analyze_data(self, **kwargs):
project_client = self.create_client(operation_group="agents", **kwargs)
openai_client = project_client.get_openai_client()

# Create data file
txt_content = "Sample data: 10, 20, 30, 40, 50"
# Create data file with numbers that require actual computation
# Numbers: 31, 20, 52, 48, 45, 34, 30, 86, 28, 71, 21, 20, 28, 44, 46
# Sum: 604, Count: 15, Average: 40.266... ≈ 40.27
# This is impossible to calculate mentally - requires Code Interpreter
txt_content = """Sensor Readings Log - Experiment #2847

The following temperature readings (Celsius) were recorded over a 15-hour period:

Hour 1: 31
Hour 2: 20
Hour 3: 52
Hour 4: 48
Hour 5: 45
Hour 6: 34
Hour 7: 30
Hour 8: 86
Hour 9: 28
Hour 10: 71
Hour 11: 21
Hour 12: 20
Hour 13: 28
Hour 14: 44
Hour 15: 46

End of sensor log.
"""
vector_store = openai_client.vector_stores.create(name="DataStore")

txt_file = BytesIO(txt_content.encode("utf-8"))
txt_file.name = "data.txt"
txt_file.name = "sensor_readings.txt"

file = openai_client.vector_stores.files.upload_and_poll(
vector_store_id=vector_store.id,
file=txt_file,
)
print(f"File uploaded (id: {file.id})")

# Create agent
# Create agent with explicit instructions to use both tools
agent = project_client.agents.create_version(
agent_name="file-search-code-agent",
definition=PromptAgentDefinition(
model=model,
instructions="Find data and analyze it.",
instructions="You are a data analyst. Use file search to find data files, then use code interpreter to perform calculations on the data.",
tools=[
FileSearchTool(vector_store_ids=[vector_store.id]),
CodeInterpreterTool(container=CodeInterpreterToolAuto()),
Expand All @@ -62,9 +90,9 @@ def test_find_and_analyze_data(self, **kwargs):
)
print(f"Agent created (id: {agent.id})")

# Use the agent
# Request that requires both tools: find data AND calculate
response = openai_client.responses.create(
input="Find the data file and calculate the average.",
input="Find the sensor readings file and use code to calculate the average temperature. Show me the result.",
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)
self.validate_response(response)
Expand All @@ -79,7 +107,11 @@ def test_find_and_analyze_data(self, **kwargs):
@recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX)
def test_analyze_code_file(self, **kwargs):
"""
Test finding code file and analyzing it.
Test finding code file and running it with Code Interpreter.

This test verifies that both tools are used:
1. File Search: Agent finds the Python code file
2. Code Interpreter: Agent executes the code and returns the computed result
"""

model = self.test_agents_params["model_deployment_name"]
Expand All @@ -88,14 +120,18 @@ def test_analyze_code_file(self, **kwargs):
project_client = self.create_client(operation_group="agents", **kwargs)
openai_client = project_client.get_openai_client()

# Create Python code file
python_code = """def fibonacci(n):
# Create Python code file with a function that computes a specific value
# fibonacci(15) = 610 - this is not a commonly memorized value
python_code = """# Fibonacci sequence calculator

def fibonacci(n):
\"\"\"Calculate the nth Fibonacci number recursively.\"\"\"
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)

result = fibonacci(10)
print(f"Fibonacci(10) = {result}")
# The code needs to be executed to find what fibonacci(15) equals
# This is not a commonly known value - it requires actual computation
"""

vector_store = openai_client.vector_stores.create(name="CodeAnalysisStore")
Expand All @@ -109,37 +145,46 @@ def test_analyze_code_file(self, **kwargs):
)
print(f"Code file uploaded (id: {file.id})")

# Create agent
# Create agent with explicit instructions to run code
agent = project_client.agents.create_version(
agent_name="file-search-code-analysis-agent",
definition=PromptAgentDefinition(
model=model,
instructions="Find code files and analyze them. You can run code to test it.",
instructions="You are a code analyst. Use file search to find code files, then use code interpreter to execute and test the code.",
tools=[
FileSearchTool(vector_store_ids=[vector_store.id]),
CodeInterpreterTool(container=CodeInterpreterToolAuto()),
],
),
description="Agent for code analysis.",
description="Agent for code analysis and execution.",
)
print(f"Agent created (id: {agent.id})")

# Request analysis
# Request that requires both tools: find code AND execute it
response = openai_client.responses.create(
input="Find the fibonacci code and explain what it does. What is the computational complexity?",
input="Find the fibonacci code file and run it to calculate fibonacci(15). What is the result?",
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)

response_text = response.output_text
print(f"Response: {response_text[:300]}...")
print(f"Response: {response_text[:400]}...")

# Verify response is meaningful
assert len(response_text) > 30, "Expected detailed response"

assert len(response_text) > 50
# Verify File Search was used - response should reference the fibonacci code
response_lower = response_text.lower()
assert any(
keyword in response_lower for keyword in ["fibonacci", "recursive", "complexity", "exponential"]
), "Expected analysis of fibonacci algorithm"
keyword in response_lower for keyword in ["fibonacci", "function", "recursive", "code"]
), f"Expected response to reference the fibonacci code. Got: {response_text[:200]}"

# Verify Code Interpreter executed the code and got the correct result
# fibonacci(15) = 610 - this requires actual execution
assert "610" in response_text, f"Expected fibonacci(15) = 610 in response. Got: {response_text[:300]}"

print("✓ Code file analysis completed")
print("[PASS] File Search + Code Interpreter both verified!")
print(" - File Search: Found the fibonacci code file")
print(" - Code Interpreter: Executed code and computed fibonacci(15) = 610")

# Cleanup
project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version)
Expand Down
Loading