From 8f8155b4a566727b2e154c19a2483c9e209bfaa8 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Thu, 4 Dec 2025 16:56:03 -0700 Subject: [PATCH 01/36] add multiagent --- .gitignore | 3 +- justfile | 3 + src/ursa/experimental/agents/multiagent.py | 128 +++++++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 src/ursa/experimental/agents/multiagent.py diff --git a/.gitignore b/.gitignore index 696f49eb..c43b5c40 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,5 @@ arxiv_papers *.sqfs ursa_workspace/ .vscode/settings.json -scratch/ \ No newline at end of file +scratch/ +ursa-workspace/ diff --git a/justfile b/justfile index ead5fb84..5a7a418e 100644 --- a/justfile +++ b/justfile @@ -107,3 +107,6 @@ shell: pygrep pattern: conda run --live-stream -n base watch \ grep --exclude-dir=__pycache__ --exclude-dir=.venv -r '{{ pattern }}' + +python: + uv run ipython --no-autoindent diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py new file mode 100644 index 00000000..fa521dda --- /dev/null +++ b/src/ursa/experimental/agents/multiagent.py @@ -0,0 +1,128 @@ +from pathlib import Path +from typing import Optional + +from langchain.agents import create_agent +from langchain.chat_models import BaseChatModel +from langchain.messages import HumanMessage +from langchain.tools import tool +from langgraph.checkpoint.base import BaseCheckpointSaver + +from ursa.agents import ExecutionAgent, PlanningAgent +from ursa.util import Checkpointer + +system_prompt = """\ +You are an agent with multiple subagents and tools. + +These agents are available to you: + +* execution_agent + * Use this agent whenever you are asked to write/edit code or run arbitrary + commands from the command line. + +* planning_agent + * Use this agent whenever you are asked to plan out tasks. + +Note that if the user asks you to plan and then execute a task, you are +to iterate through each part (step or bullet point) of a task and then +carry out the execution agent. Here is an example query: + +Please make a plan to print the first 10 natural numbers in python, then execute +the code. + +For this query, a generated plan might look like this: + +``` +The user wants to compute the first 10 natural numbers in python. This is the plan. + +* step 1: write code +* step 2: check that code is correct +``` + +In this case you should call the execution agent for step 1; and then call the +execution agent for step 2. If more steps are in the plan, keep calling the +execution agent. +""" + + +# NOTE: Is the solution to have a tool that breaks up the string plan, and then +# execute each section of the plan? +@tool +def execute_plan(plan: str): + """Execute plan item by item.""" + ... + + +class Ursa: + def __init__( + self, + llm: BaseChatModel, + extra_tools: list = [], + workspace: Path = Path("ursa-workspace"), + checkpointer: Optional[BaseCheckpointSaver] = None, + thread_id: str = "ursa", + max_reflection_steps: int = 1, + system_prompt: str = system_prompt, + ): + self.llm = llm + self.extra_tools = extra_tools + self.workspace = workspace + self.checkpointer = checkpointer + self.thread_id = thread_id + self.system_prompt = system_prompt + self.max_reflection_steps = max_reflection_steps + self.checkpointer = checkpointer or Checkpointer.from_workspace( + workspace + ) + + def make_planning_tool(self): + planning_agent = PlanningAgent( + self.llm, + max_reflection_steps=self.max_reflection_steps, + thread_id=self.thread_id, + ) + + @tool( + "planning_agent", + description="Create plans for arbitrary tasks", + ) + def call_agent(query: str): + result = planning_agent.invoke({ + "messages": [HumanMessage(query)], + }) + return result["messages"][-1].content + + return call_agent + + def make_execution_tool(self): + execution_agent = ExecutionAgent(self.llm, thread_id=self.thread_id) + + @tool( + "execution_agent", + description="Read and edit scripts/code, and execute arbitrary commands on command line.", + ) + def call_agent(query: str): + result = execution_agent.invoke({ + "messages": [HumanMessage(query)], + "workspace": str(self.workspace), + }) + return result["messages"][-1].content + + return call_agent + + def create(self, **kwargs): + """Create agent. + + kwargs: for `create_agent` + """ + self.subagents = [ + self.make_execution_tool(), + self.make_planning_tool(), + ] + self.tools = self.subagents + self.extra_tools + return create_agent( + self.llm, + tools=self.tools, + system_prompt=self.system_prompt, + checkpointer=self.checkpointer, + **kwargs, + ) From f3856dfe12a8927c79b0a613adbb117e88190e12 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 5 Dec 2025 16:49:50 -0700 Subject: [PATCH 02/36] plan_execute_tool --- dev/.gitignore | 1 + dev/test.py | 87 +++++++++++ justfile | 1 + src/ursa/agents/execution_agent.py | 3 +- src/ursa/experimental/agents/deep.py | 42 ++++++ src/ursa/experimental/agents/multiagent.py | 165 ++++++++++++--------- 6 files changed, 232 insertions(+), 67 deletions(-) create mode 100644 dev/.gitignore create mode 100644 dev/test.py create mode 100644 src/ursa/experimental/agents/deep.py diff --git a/dev/.gitignore b/dev/.gitignore new file mode 100644 index 00000000..0e51ad35 --- /dev/null +++ b/dev/.gitignore @@ -0,0 +1 @@ +ursa-workspace/ diff --git a/dev/test.py b/dev/test.py new file mode 100644 index 00000000..008bb531 --- /dev/null +++ b/dev/test.py @@ -0,0 +1,87 @@ +import os + +import httpx +from langchain.chat_models import init_chat_model +from langchain.messages import HumanMessage +from langchain_openai import ChatOpenAI +from langgraph.checkpoint.memory import InMemorySaver +from pydantic import SecretStr + +from ursa.experimental.agents.multiagent import Ursa + +aiportal = False + +if aiportal: + llm = ChatOpenAI( + model=os.environ["CLAUDE"], + # model="gpt-oss-120b", + base_url=os.environ["AIPORTAL_API_URL"], + api_key=SecretStr(os.environ["AIPORTAL_API_KEY"]), + http_client=httpx.Client(verify=False), + ) +else: + # llm = init_chat_model("ollama:ministral-3:14b") + llm = init_chat_model("openai:gpt-5-nano") + + +agent = Ursa( + llm, + max_reflection_steps=0, + checkpointer=InMemorySaver(), +).create() + +results = [] + + +def run(query: str): + print(f"Task:\n{query}") + results.append( + result := agent.invoke( + {"messages": [HumanMessage(query)]}, + { + "configurable": { + "thread_id": "ursa", + }, + "recursion_limit": 50, + }, + ) + ) + return result + + +# run( +# "Write and execute a very minimal python script to compute Pi using Monte Carlo." +# ) + +# run("What did you just do?") + +# print(results) + + +# run( +# "Write a plan to write a very minimal python script to compute Pi using Monte Carlo." +# "After planning, please execute the plan step by step. Save any code to disk." +# ) + +# run("Can you now execute the plan?") + +query = """ +I have a file `data/data.csv`. + +**First**, read the first few lines of the file to understand the format. +Do this quickly; don't go overboard. + +**Then**, write a plan (with at most 3 steps) to perform simple linear +regression on this data in python. The linear regression must have a slope and +intercept. The plan MUST NOT include code; though it may include instruction +to write code. The analysis should be **very minimal** and AS CONCISE AS +POSSIBLE. + +**Finally**, EXECUTE THE PLAN using execute_plan_tool. Write any code to +*`output/`. DO NOT write anything to `data/`. +""" +run(query) + +for result in results: + for msg in result["messages"]: + msg.pretty_print() diff --git a/justfile b/justfile index 5a7a418e..0740f98c 100644 --- a/justfile +++ b/justfile @@ -108,5 +108,6 @@ pygrep pattern: conda run --live-stream -n base watch \ grep --exclude-dir=__pycache__ --exclude-dir=.venv -r '{{ pattern }}' +[no-cd] python: uv run ipython --no-autoindent diff --git a/src/ursa/agents/execution_agent.py b/src/ursa/agents/execution_agent.py index 6260c5f3..e46509f2 100644 --- a/src/ursa/agents/execution_agent.py +++ b/src/ursa/agents/execution_agent.py @@ -48,6 +48,7 @@ ToolMessage, ) from langchain_core.tools import StructuredTool +from langchain_core.tools.base import BaseTool from langchain_mcp_adapters.client import MultiServerMCPClient from langgraph.graph import StateGraph from langgraph.graph.message import add_messages @@ -221,7 +222,7 @@ def __init__( llm: BaseChatModel, agent_memory: Optional[Any | AgentMemory] = None, log_state: bool = False, - extra_tools: Optional[list[Callable[..., Any]]] = None, + extra_tools: Optional[list[BaseTool]] = None, tokens_before_summarize: int = 50000, messages_to_keep: int = 20, safe_codes: Optional[list[str]] = None, diff --git a/src/ursa/experimental/agents/deep.py b/src/ursa/experimental/agents/deep.py new file mode 100644 index 00000000..372799cd --- /dev/null +++ b/src/ursa/experimental/agents/deep.py @@ -0,0 +1,42 @@ +from deepagents import CompiledSubAgent, create_deep_agent +from langchain.chat_models import init_chat_model +from langchain.messages import HumanMessage + +from ursa.agents import ExecutionAgent + +llm = init_chat_model("openai:gpt-5-nano") +exe_graph = ExecutionAgent(llm=llm)._action + +# # Create a custom agent graph +# custom_graph = create_agent( +# model=exe_graph, +# # tools=specialized_tools, +# system_prompt="You are a specialized agent for data analysis...", +# ) + +# Use it as a custom subagent +exe_subagent = CompiledSubAgent( + name="executor", + description="Specialized agent for writing/executing code", + runnable=exe_graph, +) + +subagents = [exe_subagent] + +agent = create_deep_agent( + model=llm, + # tools=[internet_search], + system_prompt="You are a data scientist. When asked to write code, use the executor agent.", + subagents=[exe_subagent], +) + + +results = [] + + +def run(query: str): + results.append(result := agent.invoke({"messages": [HumanMessage(query)]})) + return result + + +run("Write a very minimal python script to compute Pi using Monte Carlo.") diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index fa521dda..03f5b55b 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import Optional +import yaml from langchain.agents import create_agent from langchain.chat_models import BaseChatModel from langchain.messages import HumanMessage @@ -11,45 +12,109 @@ from ursa.util import Checkpointer system_prompt = """\ -You are an agent with multiple subagents and tools. +You are an data scientist with multiple tools. -These agents are available to you: - -* execution_agent - * Use this agent whenever you are asked to write/edit code or run arbitrary - commands from the command line. +These tools are available to you: * planning_agent - * Use this agent whenever you are asked to plan out tasks. - -Note that if the user asks you to plan and then execute a task, you are -to iterate through each part (step or bullet point) of a task and then -carry out the execution agent. Here is an example query: - -Please make a plan to print the first 10 natural numbers in python, then execute -the code. + * Use this tool whenever you are asked to plan out tasks. + * In each step of your plan, if code needs to be generated, please + explicitly state in the step that code needs to be written and executed. -For this query, a generated plan might look like this: - -``` -The user wants to compute the first 10 natural numbers in python. This is the plan. - -* step 1: write code -* step 2: check that code is correct -``` +* execution_agent + * Use this tool **whenever** you are asked to write/edit code or run arbitrary + commands from the command line. -In this case you should call the execution agent for step 1; and then call the -execution agent for step 2. If more steps are in the plan, keep calling the -execution agent. +* execute_plan_tool + * Use this tool if you are asked to execute a plan that starts with and ends with . + * Do not use this tool if the tags are not present in the instruction! """ # NOTE: Is the solution to have a tool that breaks up the string plan, and then # execute each section of the plan? -@tool -def execute_plan(plan: str): - """Execute plan item by item.""" - ... +def make_execute_plan_tool(llm: BaseChatModel, workspace: Path): + execution_agent = ExecutionAgent(llm)._action + + @tool( + "execute_plan_tool", + description="Execute a plan from the planning agent tool.", + ) + def execute_plan(plan: str): + """Execute plan item by item.""" + + print("EXECUTING PLAN") + if plan.startswith("") and plan.endswith(""): + summaries = [] + + plan_steps = yaml.safe_load( + plan.replace("", "").replace("", "").strip() + ) + for step in plan_steps: + step_prompt = "You are contributing to a larger solution.\n\n" + if len(summaries) > 0: + last_step_summary = summaries[-1] + step_prompt += ( + f"Previous-step summary: {last_step_summary}\n\n" + ) + step_prompt += f"Now, execute the following step (and if you write any code, be sure to execute the code to make sure it works):\n{yaml.dump(step)}" + print(step_prompt) + + result = execution_agent.invoke({ + "messages": [HumanMessage(step_prompt)], + "workspace": str(workspace), + }) + last_step_summary = result["messages"][-1].content + summaries.append(last_step_summary) + return "Grand summary of plan execution:\n\n" + "\n\n".join( + summaries + ) + else: + return ( + "Could not use `execute_plan` tool execute plan " + "as plan does not start/end with /." + ) + + return execute_plan + + +def make_planning_tool(llm: BaseChatModel, max_reflection_steps: int): + planning_agent = PlanningAgent( + llm, max_reflection_steps=max_reflection_steps + )._action + + @tool( + "planning_agent", + description="Create plans for arbitrary tasks", + ) + def call_agent(query: str): + result = planning_agent.invoke({ + "messages": [HumanMessage(query)], + "reflection_steps": max_reflection_steps, + }) + # return result["messages"][-1].content + plan = f"\n{yaml.dump(result['plan_steps'])}\n" + print(plan) + return plan + + return call_agent + + +def make_execution_tool(llm: BaseChatModel, workspace: Path): + execution_agent = ExecutionAgent(llm)._action + + @tool( + "execution_agent", + description="Read and edit scripts/code, and execute arbitrary commands on command line.", + ) + def call_agent(query: str): + result = execution_agent.invoke({ + "messages": [HumanMessage(query)], + "workspace": str(workspace), + }) + return result["messages"][-1].content + + return call_agent class Ursa: @@ -74,49 +139,17 @@ def __init__( workspace ) - def make_planning_tool(self): - planning_agent = PlanningAgent( - self.llm, - max_reflection_steps=self.max_reflection_steps, - thread_id=self.thread_id, - ) - - @tool( - "planning_agent", - description="Create plans for arbitrary tasks", - ) - def call_agent(query: str): - result = planning_agent.invoke({ - "messages": [HumanMessage(query)], - }) - return result["messages"][-1].content - - return call_agent - - def make_execution_tool(self): - execution_agent = ExecutionAgent(self.llm, thread_id=self.thread_id) - - @tool( - "execution_agent", - description="Read and edit scripts/code, and execute arbitrary commands on command line.", - ) - def call_agent(query: str): - result = execution_agent.invoke({ - "messages": [HumanMessage(query)], - "workspace": str(self.workspace), - }) - return result["messages"][-1].content - - return call_agent - def create(self, **kwargs): """Create agent. kwargs: for `create_agent` """ self.subagents = [ - self.make_execution_tool(), - self.make_planning_tool(), + make_execution_tool(llm=self.llm, workspace=self.workspace), + make_planning_tool( + llm=self.llm, max_reflection_steps=self.max_reflection_steps + ), + make_execute_plan_tool(llm=self.llm, workspace=self.workspace), ] self.tools = self.subagents + self.extra_tools return create_agent( From 066cedc023e683877cae7605eb69b394e5de0a08 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 5 Dec 2025 17:02:45 -0700 Subject: [PATCH 03/36] yes --- dev/test.py | 5 +++-- src/ursa/experimental/agents/multiagent.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dev/test.py b/dev/test.py index 008bb531..84bee474 100644 --- a/dev/test.py +++ b/dev/test.py @@ -77,8 +77,9 @@ def run(query: str): to write code. The analysis should be **very minimal** and AS CONCISE AS POSSIBLE. -**Finally**, EXECUTE THE PLAN using execute_plan_tool. Write any code to -*`output/`. DO NOT write anything to `data/`. +**Finally**, EXECUTE THE PLAN using execute_plan_tool. Write all code to +*`output/analysis.py`. DO NOT write anything to `data/`. Do not write any other +*files. I want a single file with the entire analysis. """ run(query) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 03f5b55b..ab99dd77 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -1,7 +1,7 @@ +import json from pathlib import Path from typing import Optional -import yaml from langchain.agents import create_agent from langchain.chat_models import BaseChatModel from langchain.messages import HumanMessage @@ -47,7 +47,7 @@ def execute_plan(plan: str): if plan.startswith("") and plan.endswith(""): summaries = [] - plan_steps = yaml.safe_load( + plan_steps = json.loads( plan.replace("", "").replace("", "").strip() ) for step in plan_steps: @@ -57,7 +57,7 @@ def execute_plan(plan: str): step_prompt += ( f"Previous-step summary: {last_step_summary}\n\n" ) - step_prompt += f"Now, execute the following step (and if you write any code, be sure to execute the code to make sure it works):\n{yaml.dump(step)}" + step_prompt += f"Now, execute the following step (and if you write any code, be sure to execute the code to make sure it works):\n{json.dumps(step)}" print(step_prompt) result = execution_agent.invoke({ @@ -93,7 +93,7 @@ def call_agent(query: str): "reflection_steps": max_reflection_steps, }) # return result["messages"][-1].content - plan = f"\n{yaml.dump(result['plan_steps'])}\n" + plan = f"\n{json.dumps(result['plan_steps'])}\n" print(plan) return plan From 9b346942350b40d103b1d8b4986724b12c028c58 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 5 Dec 2025 17:17:05 -0700 Subject: [PATCH 04/36] yes --- dev/test.py | 4 ++-- src/ursa/experimental/agents/multiagent.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/dev/test.py b/dev/test.py index 84bee474..7fbb6954 100644 --- a/dev/test.py +++ b/dev/test.py @@ -78,8 +78,8 @@ def run(query: str): POSSIBLE. **Finally**, EXECUTE THE PLAN using execute_plan_tool. Write all code to -*`output/analysis.py`. DO NOT write anything to `data/`. Do not write any other -*files. I want a single file with the entire analysis. +*`analysis.py`. DO NOT write anything to `data/`. Do not write any other +files. I want a single file with the entire analysis. """ run(query) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index ab99dd77..bb29f715 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -47,11 +47,15 @@ def execute_plan(plan: str): if plan.startswith("") and plan.endswith(""): summaries = [] - plan_steps = json.loads( + task_and_plan_steps = json.loads( plan.replace("", "").replace("", "").strip() ) + task = task_and_plan_steps[0]["task"] + plan_steps = task_and_plan_steps[1:] for step in plan_steps: - step_prompt = "You are contributing to a larger solution.\n\n" + step_prompt = ( + f"You are contributing to a larger solution:\n{task}.\n\n" + ) if len(summaries) > 0: last_step_summary = summaries[-1] step_prompt += ( @@ -93,7 +97,8 @@ def call_agent(query: str): "reflection_steps": max_reflection_steps, }) # return result["messages"][-1].content - plan = f"\n{json.dumps(result['plan_steps'])}\n" + plan_steps = [{"task": query}] + result["plan_steps"] + plan = f"\n{json.dumps(plan_steps)}\n" print(plan) return plan From fb2d2e6322851e17c1177d300e1242cce26c2005 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Sun, 7 Dec 2025 12:20:05 -0700 Subject: [PATCH 05/36] yes --- dev/test.py | 2 +- src/ursa/experimental/agents/multiagent.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dev/test.py b/dev/test.py index 7fbb6954..f4a9807d 100644 --- a/dev/test.py +++ b/dev/test.py @@ -9,7 +9,7 @@ from ursa.experimental.agents.multiagent import Ursa -aiportal = False +aiportal = True if aiportal: llm = ChatOpenAI( diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index bb29f715..7e0cfe05 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -28,6 +28,10 @@ * execute_plan_tool * Use this tool if you are asked to execute a plan that starts with and ends with . * Do not use this tool if the tags are not present in the instruction! + +Note that this project is managed by `uv. So, if you need to execute python +code, you MUST run `uv run path/to/file.py`. +DO NOT run `python /path/to/file.py` or `python3 /path/to/file.py`. """ From 659eeea898991d04d078fa18fc66da0133fca67f Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 8 Dec 2025 12:42:52 -0700 Subject: [PATCH 06/36] improve demo --- dev/.gitignore | 1 - dev/dev-workspace/.gitignore | 2 ++ dev/test.py | 4 ++- src/ursa/experimental/agents/multiagent.py | 29 ++++++++++++++-------- 4 files changed, 23 insertions(+), 13 deletions(-) delete mode 100644 dev/.gitignore create mode 100644 dev/dev-workspace/.gitignore diff --git a/dev/.gitignore b/dev/.gitignore deleted file mode 100644 index 0e51ad35..00000000 --- a/dev/.gitignore +++ /dev/null @@ -1 +0,0 @@ -ursa-workspace/ diff --git a/dev/dev-workspace/.gitignore b/dev/dev-workspace/.gitignore new file mode 100644 index 00000000..352cdd3d --- /dev/null +++ b/dev/dev-workspace/.gitignore @@ -0,0 +1,2 @@ +* +!data.csv diff --git a/dev/test.py b/dev/test.py index f4a9807d..6c668f3f 100644 --- a/dev/test.py +++ b/dev/test.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import httpx from langchain.chat_models import init_chat_model @@ -9,7 +10,7 @@ from ursa.experimental.agents.multiagent import Ursa -aiportal = True +aiportal = False if aiportal: llm = ChatOpenAI( @@ -27,6 +28,7 @@ agent = Ursa( llm, max_reflection_steps=0, + workspace=Path("dev-workspace"), checkpointer=InMemorySaver(), ).create() diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 7e0cfe05..6eec769f 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -2,6 +2,7 @@ from pathlib import Path from typing import Optional +import yaml from langchain.agents import create_agent from langchain.chat_models import BaseChatModel from langchain.messages import HumanMessage @@ -12,26 +13,28 @@ from ursa.util import Checkpointer system_prompt = """\ -You are an data scientist with multiple tools. +You are a data scientist with multiple tools. These tools are available to you: * planning_agent * Use this tool whenever you are asked to plan out tasks. - * In each step of your plan, if code needs to be generated, please - explicitly state in the step that code needs to be written and executed. + * In each step of your plan, if code needs to be generated, please explicitly + state in the step that code needs to be written and executed. * execution_agent - * Use this tool **whenever** you are asked to write/edit code or run arbitrary - commands from the command line. + * Use this tool **whenever** you are asked to write/edit code or run + arbitrary commands from the command line. * execute_plan_tool - * Use this tool if you are asked to execute a plan that starts with and ends with . - * Do not use this tool if the tags are not present in the instruction! + * Use this tool if you are asked to execute a plan that starts with + and ends with . + * Do not use this tool if the tags are not present in the + instruction! Note that this project is managed by `uv. So, if you need to execute python -code, you MUST run `uv run path/to/file.py`. -DO NOT run `python /path/to/file.py` or `python3 /path/to/file.py`. +code, you MUST run `uv run path/to/file.py`. DO NOT run `python +/path/to/file.py` or `python3 /path/to/file.py`. """ @@ -58,14 +61,18 @@ def execute_plan(plan: str): plan_steps = task_and_plan_steps[1:] for step in plan_steps: step_prompt = ( - f"You are contributing to a larger solution:\n{task}.\n\n" + f"You are contributing to a larger solution:\n{task}\n\n" ) if len(summaries) > 0: last_step_summary = summaries[-1] step_prompt += ( f"Previous-step summary: {last_step_summary}\n\n" ) - step_prompt += f"Now, execute the following step (and if you write any code, be sure to execute the code to make sure it works):\n{json.dumps(step)}" + step_prompt += ( + "Now, execute the following step (and if you write any " + "code, be sure to execute the code to make sure it " + f"works):\n{yaml.dump(step)}" + ) print(step_prompt) result = execution_agent.invoke({ From 4173c11001b3f9a7278c7c7276b04382f2cdfb57 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 8 Dec 2025 12:43:19 -0700 Subject: [PATCH 07/36] yes --- dev/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/test.py b/dev/test.py index 6c668f3f..f98cf29c 100644 --- a/dev/test.py +++ b/dev/test.py @@ -67,6 +67,7 @@ def run(query: str): # run("Can you now execute the plan?") +# TODO: Need to make `uv run` a SAFE command. query = """ I have a file `data/data.csv`. From aa68c1a28a339785cd59d57b6bd0ae7deb7c4b1b Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 8 Dec 2025 13:01:23 -0700 Subject: [PATCH 08/36] yes --- dev/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/test.py b/dev/test.py index f98cf29c..866e6f12 100644 --- a/dev/test.py +++ b/dev/test.py @@ -22,7 +22,7 @@ ) else: # llm = init_chat_model("ollama:ministral-3:14b") - llm = init_chat_model("openai:gpt-5-nano") + llm = init_chat_model("openai:gpt-5") agent = Ursa( From 983acba3d82308db44635862b158b28a96fcc96e Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 8 Dec 2025 13:06:06 -0700 Subject: [PATCH 09/36] yes --- dev/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/test.py b/dev/test.py index 866e6f12..a27d8c4d 100644 --- a/dev/test.py +++ b/dev/test.py @@ -80,9 +80,11 @@ def run(query: str): to write code. The analysis should be **very minimal** and AS CONCISE AS POSSIBLE. -**Finally**, EXECUTE THE PLAN using execute_plan_tool. Write all code to +**Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to *`analysis.py`. DO NOT write anything to `data/`. Do not write any other files. I want a single file with the entire analysis. + +**Finally**, Edit *`analysis.py` to make it AS CONCISE AS POSSIBLE. """ run(query) From fe394d5b71ad627bcf67fdf68b65097b19917dbc Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 8 Dec 2025 13:06:40 -0700 Subject: [PATCH 10/36] yes --- dev/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/test.py b/dev/test.py index a27d8c4d..246c75e2 100644 --- a/dev/test.py +++ b/dev/test.py @@ -84,7 +84,7 @@ def run(query: str): *`analysis.py`. DO NOT write anything to `data/`. Do not write any other files. I want a single file with the entire analysis. -**Finally**, Edit *`analysis.py` to make it AS CONCISE AS POSSIBLE. +**Finally**, edit *`analysis.py` to make it AS CONCISE AS POSSIBLE. """ run(query) From 598309f0155b2a3e64709d8174063e7c50a14e00 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Tue, 9 Dec 2025 11:37:29 -0700 Subject: [PATCH 11/36] add todo for input/output control between agents --- src/ursa/experimental/agents/multiagent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 6eec769f..b463c67a 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -40,6 +40,8 @@ # NOTE: Is the solution to have a tool that breaks up the string plan, and then # execute each section of the plan? +# TODO: Try doing this instead: +# https://docs.langchain.com/oss/python/langchain/multi-agent#where-to-customize def make_execute_plan_tool(llm: BaseChatModel, workspace: Path): execution_agent = ExecutionAgent(llm)._action From 79fc6cc58d39b82296298bbda27b52f287fe209d Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Tue, 9 Dec 2025 12:33:20 -0700 Subject: [PATCH 12/36] yes --- dev/dev-workspace/.gitignore | 2 -- dev/justfile | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) delete mode 100644 dev/dev-workspace/.gitignore create mode 100644 dev/justfile diff --git a/dev/dev-workspace/.gitignore b/dev/dev-workspace/.gitignore deleted file mode 100644 index 352cdd3d..00000000 --- a/dev/dev-workspace/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!data.csv diff --git a/dev/justfile b/dev/justfile new file mode 100644 index 00000000..f64b8998 --- /dev/null +++ b/dev/justfile @@ -0,0 +1,6 @@ +help: + just -l -u + +clean: + rm -rf dev-workspace/__pycache__ dev-workspace/results + find dev-workspace -mindepth 1 -not -name data.csv -exec rm -f {} + From 5beaa26c68df0e120c3b0edb5c26054357833de1 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Tue, 9 Dec 2025 12:45:56 -0700 Subject: [PATCH 13/36] yes --- dev/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/test.py b/dev/test.py index 246c75e2..63c9a170 100644 --- a/dev/test.py +++ b/dev/test.py @@ -74,10 +74,10 @@ def run(query: str): **First**, read the first few lines of the file to understand the format. Do this quickly; don't go overboard. -**Then**, write a plan (with at most 3 steps) to perform simple linear +**Then**, write a plan (with at most 4 steps) to perform simple linear regression on this data in python. The linear regression must have a slope and intercept. The plan MUST NOT include code; though it may include instruction -to write code. The analysis should be **very minimal** and AS CONCISE AS +to write code. The analysis should be **very minimal** and AS CONCISE AS POSSIBLE. **Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to From a11869580cefcc8d0d187e8c696d4c1ba55dac63 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 12 Dec 2025 10:01:08 -0700 Subject: [PATCH 14/36] yes --- dev/.gitignore | 1 + dev/justfile | 6 ++++++ dev/{test.py => run.py} | 17 ++++++++++------- src/ursa/experimental/agents/multiagent.py | 22 ++++++++++++++-------- 4 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 dev/.gitignore rename dev/{test.py => run.py} (75%) diff --git a/dev/.gitignore b/dev/.gitignore new file mode 100644 index 00000000..76fa7ff5 --- /dev/null +++ b/dev/.gitignore @@ -0,0 +1 @@ +dev-workspace diff --git a/dev/justfile b/dev/justfile index f64b8998..b1442c8f 100644 --- a/dev/justfile +++ b/dev/justfile @@ -4,3 +4,9 @@ help: clean: rm -rf dev-workspace/__pycache__ dev-workspace/results find dev-workspace -mindepth 1 -not -name data.csv -exec rm -f {} + + +run: + uv run run.py + +test: + cd dev-workspace && uv run analysis.py diff --git a/dev/test.py b/dev/run.py similarity index 75% rename from dev/test.py rename to dev/run.py index 63c9a170..58f954b9 100644 --- a/dev/test.py +++ b/dev/run.py @@ -22,7 +22,7 @@ ) else: # llm = init_chat_model("ollama:ministral-3:14b") - llm = init_chat_model("openai:gpt-5") + llm = init_chat_model("openai:gpt-5.2") agent = Ursa( @@ -75,16 +75,19 @@ def run(query: str): Do this quickly; don't go overboard. **Then**, write a plan (with at most 4 steps) to perform simple linear -regression on this data in python. The linear regression must have a slope and -intercept. The plan MUST NOT include code; though it may include instruction -to write code. The analysis should be **very minimal** and AS CONCISE AS -POSSIBLE. +regression on this data in python. I care only about the coefficients. Do not +provide other information or plots. The plan MUST NOT include code; though it +may include instruction to write code. The analysis should be **very minimal** +and AS CONCISE AS POSSIBLE. **Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to -*`analysis.py`. DO NOT write anything to `data/`. Do not write any other +`analysis.py`. DO NOT write anything to `data/`. Do not write any other files. I want a single file with the entire analysis. -**Finally**, edit *`analysis.py` to make it AS CONCISE AS POSSIBLE. +**Finally**, edit `analysis.py` to make it AS CONCISE AS POSSIBLE. Don't +include code for assert, plots, etc. I want ONLY a very minimal script that +reads the data and then prints the linear model's coefficients. Remember, I +want A SINGLE FILE with the entire analysis (in `analysis.py`). """ run(query) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index b463c67a..0a753980 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -38,6 +38,10 @@ """ +def tag(tag_name: str, content: str): + return f"\n<{tag_name}>\n{content}\n\n\n" + + # NOTE: Is the solution to have a tool that breaks up the string plan, and then # execute each section of the plan? # TODO: Try doing this instead: @@ -63,18 +67,20 @@ def execute_plan(plan: str): plan_steps = task_and_plan_steps[1:] for step in plan_steps: step_prompt = ( - f"You are contributing to a larger solution:\n{task}\n\n" + "You are contributing a solution to the following overall plan. " + "The overall plan, last step's summary, and next step are as follows." + "With this information, please carry out the next step. " + "IF you write any code, be sure to execute the code to make " + "sure it properly runs." ) + step_prompt += tag("OVERALL_PLAN", task) if len(summaries) > 0: last_step_summary = summaries[-1] - step_prompt += ( - f"Previous-step summary: {last_step_summary}\n\n" + step_prompt += tag( + "SUMMARY_OF_LAST_STEP", last_step_summary ) - step_prompt += ( - "Now, execute the following step (and if you write any " - "code, be sure to execute the code to make sure it " - f"works):\n{yaml.dump(step)}" - ) + + step_prompt += tag("NEXT_STEP", yaml.dump(step)) print(step_prompt) result = execution_agent.invoke({ From 45185795de764b72dd68a09a920ba9f2947f2f62 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 12 Dec 2025 11:39:58 -0700 Subject: [PATCH 15/36] format --- src/ursa/experimental/agents/multiagent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 0a753980..20196053 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -80,7 +80,7 @@ def execute_plan(plan: str): "SUMMARY_OF_LAST_STEP", last_step_summary ) - step_prompt += tag("NEXT_STEP", yaml.dump(step)) + step_prompt += tag("NEXT_STEP", yaml.dump(step).strip()) print(step_prompt) result = execution_agent.invoke({ @@ -118,7 +118,7 @@ def call_agent(query: str): # return result["messages"][-1].content plan_steps = [{"task": query}] + result["plan_steps"] plan = f"\n{json.dumps(plan_steps)}\n" - print(plan) + print(json.dumps(plan_steps, indent=4)) return plan return call_agent From 0500290d3499ec865eb8ae61236b324c981ada6d Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 12 Dec 2025 11:40:10 -0700 Subject: [PATCH 16/36] commit run.py --- dev/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/run.py b/dev/run.py index 58f954b9..2daaddbd 100644 --- a/dev/run.py +++ b/dev/run.py @@ -75,10 +75,10 @@ def run(query: str): Do this quickly; don't go overboard. **Then**, write a plan (with at most 4 steps) to perform simple linear -regression on this data in python. I care only about the coefficients. Do not -provide other information or plots. The plan MUST NOT include code; though it +regression on this data in python. The plan MUST NOT include code; though it may include instruction to write code. The analysis should be **very minimal** -and AS CONCISE AS POSSIBLE. +and AS CONCISE AS POSSIBLE. I care only about the coefficients (including an +intercept). Do not provide other information or plots. **Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to `analysis.py`. DO NOT write anything to `data/`. Do not write any other From 6c6a54e42579c1a4d435728150a072c3614ae418 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Fri, 12 Dec 2025 12:10:14 -0700 Subject: [PATCH 17/36] better print --- dev/run.py | 36 +++++++--------------- src/ursa/experimental/agents/multiagent.py | 9 +++--- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/dev/run.py b/dev/run.py index 2daaddbd..18b66703 100644 --- a/dev/run.py +++ b/dev/run.py @@ -1,27 +1,28 @@ +# NOTE: This will be helpful for prompting. +# https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide + import os from pathlib import Path import httpx from langchain.chat_models import init_chat_model from langchain.messages import HumanMessage -from langchain_openai import ChatOpenAI from langgraph.checkpoint.memory import InMemorySaver -from pydantic import SecretStr from ursa.experimental.agents.multiagent import Ursa aiportal = False if aiportal: - llm = ChatOpenAI( + llm = init_chat_model( model=os.environ["CLAUDE"], - # model="gpt-oss-120b", base_url=os.environ["AIPORTAL_API_URL"], - api_key=SecretStr(os.environ["AIPORTAL_API_KEY"]), + api_key=os.environ["AIPORTAL_API_KEY"], + model_provider="openai", + model_kwargs={"extra_headers": {"disable_fallbacks": "true"}}, http_client=httpx.Client(verify=False), ) else: - # llm = init_chat_model("ollama:ministral-3:14b") llm = init_chat_model("openai:gpt-5.2") @@ -51,22 +52,6 @@ def run(query: str): return result -# run( -# "Write and execute a very minimal python script to compute Pi using Monte Carlo." -# ) - -# run("What did you just do?") - -# print(results) - - -# run( -# "Write a plan to write a very minimal python script to compute Pi using Monte Carlo." -# "After planning, please execute the plan step by step. Save any code to disk." -# ) - -# run("Can you now execute the plan?") - # TODO: Need to make `uv run` a SAFE command. query = """ I have a file `data/data.csv`. @@ -85,9 +70,10 @@ def run(query: str): files. I want a single file with the entire analysis. **Finally**, edit `analysis.py` to make it AS CONCISE AS POSSIBLE. Don't -include code for assert, plots, etc. I want ONLY a very minimal script that -reads the data and then prints the linear model's coefficients. Remember, I -want A SINGLE FILE with the entire analysis (in `analysis.py`). +include code for assert, raising errors, exception handling, plots, etc. I want +ONLY a very minimal script that reads the data and then prints the linear +model's coefficients. Remember, I want A SINGLE FILE with the entire analysis +(in `analysis.py`). """ run(query) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 20196053..7e25a038 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -67,9 +67,9 @@ def execute_plan(plan: str): plan_steps = task_and_plan_steps[1:] for step in plan_steps: step_prompt = ( - "You are contributing a solution to the following overall plan. " - "The overall plan, last step's summary, and next step are as follows." - "With this information, please carry out the next step. " + "You are contributing a solution of an overall plan. " + "The overall plan, last step's summary, and next step are provided below. " + "With the provided information, please carry out the next step. " "IF you write any code, be sure to execute the code to make " "sure it properly runs." ) @@ -118,7 +118,8 @@ def call_agent(query: str): # return result["messages"][-1].content plan_steps = [{"task": query}] + result["plan_steps"] plan = f"\n{json.dumps(plan_steps)}\n" - print(json.dumps(plan_steps, indent=4)) + # print(json.dumps(plan_steps, indent=4)) + print(yaml.dump(plan_steps)) return plan return call_agent From 3cd1993b6d08301898167a876a78362a3cf782ed Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 15 Dec 2025 14:43:59 -0700 Subject: [PATCH 18/36] add multiagent test --- tests/agents/test_multiagent/.gitignore | 1 + .../agents/test_multiagent/test_multiagent.py | 112 ++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 tests/agents/test_multiagent/.gitignore create mode 100644 tests/agents/test_multiagent/test_multiagent.py diff --git a/tests/agents/test_multiagent/.gitignore b/tests/agents/test_multiagent/.gitignore new file mode 100644 index 00000000..e9ed58f7 --- /dev/null +++ b/tests/agents/test_multiagent/.gitignore @@ -0,0 +1 @@ +workspace/ diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py new file mode 100644 index 00000000..c8f3dd55 --- /dev/null +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -0,0 +1,112 @@ +# NOTE: This will be helpful for prompting. +# https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide + + +import os +from pathlib import Path + +import httpx +from langchain.chat_models import init_chat_model +from langchain.messages import HumanMessage +from langgraph.checkpoint.memory import InMemorySaver + +from ursa.experimental.agents.multiagent import Ursa + +aiportal = False + +if aiportal: + llm = init_chat_model( + model=os.environ["CLAUDE"], + base_url=os.environ["AIPORTAL_API_URL"], + api_key=os.environ["AIPORTAL_API_KEY"], + model_provider="openai", + model_kwargs={"extra_headers": {"disable_fallbacks": "true"}}, + http_client=httpx.Client(verify=False), + ) +else: + llm = init_chat_model("openai:gpt-5.2") + + +def generate_data(data_path: Path): + import numpy as np + import pandas as pd + + rng = np.random.default_rng(0) + x = rng.uniform(0, 1, 100) + y = rng.normal(2 * x + 1, 0.1) + pd.DataFrame(dict(x=x, y=y)).to_csv(data_path, index=False) + + +workspace = Path(__file__).parent / "workspace" +data_dir = workspace / "data" +data_csv = data_dir / "data.csv" +if not data_csv.exists(): + data_dir.mkdir(exist_ok=True, parents=True) + generate_data(data_dir / "data.csv") + +agent = Ursa( + llm, + max_reflection_steps=0, + workspace=workspace, + checkpointer=InMemorySaver(), +).create() + +results = [] + + +def run(query: str): + print(f"Task:\n{query}") + results.append( + result := agent.invoke( + {"messages": [HumanMessage(query)]}, + { + "configurable": { + "thread_id": "ursa", + }, + "recursion_limit": 50, + }, + ) + ) + return result + + +# TODO: Need to make `uv run` a SAFE command. +query_1 = """ +I have a file `data/data.csv`. + +**First**, read the first few lines of the file to understand the format. +Do this quickly; don't go overboard. + +**Then**, write a plan (with at most 4 steps) to perform simple linear +regression on this data in python. The plan MUST NOT include code; though it +may include instruction to write code. The analysis should be **very minimal** +and AS CONCISE AS POSSIBLE. I care only about the coefficients (including an +intercept). Do not provide other information or plots. + +**Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to +`analysis.py`. DO NOT write anything to `data/`. Do not write any other +files. I want a single file with the entire analysis. + +**Finally**, edit `analysis.py` to make it AS CONCISE AS POSSIBLE. Don't +include code for assert, raising errors, exception handling, plots, etc. I want +ONLY a very minimal script that reads the data and then prints the linear +model's coefficients. Remember, I want A SINGLE FILE with the entire analysis +(in `analysis.py`). +""" + +query_2 = """ +I have a file `data/data.csv`. + +Please write a very minimal python script to perform linear regression on this +data. The analysis shoud be as concise as possible. I care only about the +coefficients (including an intercept). Do not provide other information or +plots. Write the analysis to `analysis.py`. Run the code to ensure it works. +""" + + +def test_multiagent(): + run(query_1) + + for result in results: + for msg in result["messages"]: + msg.pretty_print() From 39f5d3802c4a00029a480c2e0019795e6bae0f7b Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 15 Dec 2025 14:44:25 -0700 Subject: [PATCH 19/36] remove dev --- dev/.gitignore | 1 - dev/justfile | 12 -------- dev/run.py | 82 -------------------------------------------------- 3 files changed, 95 deletions(-) delete mode 100644 dev/.gitignore delete mode 100644 dev/justfile delete mode 100644 dev/run.py diff --git a/dev/.gitignore b/dev/.gitignore deleted file mode 100644 index 76fa7ff5..00000000 --- a/dev/.gitignore +++ /dev/null @@ -1 +0,0 @@ -dev-workspace diff --git a/dev/justfile b/dev/justfile deleted file mode 100644 index b1442c8f..00000000 --- a/dev/justfile +++ /dev/null @@ -1,12 +0,0 @@ -help: - just -l -u - -clean: - rm -rf dev-workspace/__pycache__ dev-workspace/results - find dev-workspace -mindepth 1 -not -name data.csv -exec rm -f {} + - -run: - uv run run.py - -test: - cd dev-workspace && uv run analysis.py diff --git a/dev/run.py b/dev/run.py deleted file mode 100644 index 18b66703..00000000 --- a/dev/run.py +++ /dev/null @@ -1,82 +0,0 @@ -# NOTE: This will be helpful for prompting. -# https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide - -import os -from pathlib import Path - -import httpx -from langchain.chat_models import init_chat_model -from langchain.messages import HumanMessage -from langgraph.checkpoint.memory import InMemorySaver - -from ursa.experimental.agents.multiagent import Ursa - -aiportal = False - -if aiportal: - llm = init_chat_model( - model=os.environ["CLAUDE"], - base_url=os.environ["AIPORTAL_API_URL"], - api_key=os.environ["AIPORTAL_API_KEY"], - model_provider="openai", - model_kwargs={"extra_headers": {"disable_fallbacks": "true"}}, - http_client=httpx.Client(verify=False), - ) -else: - llm = init_chat_model("openai:gpt-5.2") - - -agent = Ursa( - llm, - max_reflection_steps=0, - workspace=Path("dev-workspace"), - checkpointer=InMemorySaver(), -).create() - -results = [] - - -def run(query: str): - print(f"Task:\n{query}") - results.append( - result := agent.invoke( - {"messages": [HumanMessage(query)]}, - { - "configurable": { - "thread_id": "ursa", - }, - "recursion_limit": 50, - }, - ) - ) - return result - - -# TODO: Need to make `uv run` a SAFE command. -query = """ -I have a file `data/data.csv`. - -**First**, read the first few lines of the file to understand the format. -Do this quickly; don't go overboard. - -**Then**, write a plan (with at most 4 steps) to perform simple linear -regression on this data in python. The plan MUST NOT include code; though it -may include instruction to write code. The analysis should be **very minimal** -and AS CONCISE AS POSSIBLE. I care only about the coefficients (including an -intercept). Do not provide other information or plots. - -**Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to -`analysis.py`. DO NOT write anything to `data/`. Do not write any other -files. I want a single file with the entire analysis. - -**Finally**, edit `analysis.py` to make it AS CONCISE AS POSSIBLE. Don't -include code for assert, raising errors, exception handling, plots, etc. I want -ONLY a very minimal script that reads the data and then prints the linear -model's coefficients. Remember, I want A SINGLE FILE with the entire analysis -(in `analysis.py`). -""" -run(query) - -for result in results: - for msg in result["messages"]: - msg.pretty_print() From 4c8b8aa7f755f448ff4fc1b91a27f9c4222cf313 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 15 Dec 2025 14:44:56 -0700 Subject: [PATCH 20/36] remove deep agent --- src/ursa/experimental/agents/deep.py | 42 ---------------------------- 1 file changed, 42 deletions(-) delete mode 100644 src/ursa/experimental/agents/deep.py diff --git a/src/ursa/experimental/agents/deep.py b/src/ursa/experimental/agents/deep.py deleted file mode 100644 index 372799cd..00000000 --- a/src/ursa/experimental/agents/deep.py +++ /dev/null @@ -1,42 +0,0 @@ -from deepagents import CompiledSubAgent, create_deep_agent -from langchain.chat_models import init_chat_model -from langchain.messages import HumanMessage - -from ursa.agents import ExecutionAgent - -llm = init_chat_model("openai:gpt-5-nano") -exe_graph = ExecutionAgent(llm=llm)._action - -# # Create a custom agent graph -# custom_graph = create_agent( -# model=exe_graph, -# # tools=specialized_tools, -# system_prompt="You are a specialized agent for data analysis...", -# ) - -# Use it as a custom subagent -exe_subagent = CompiledSubAgent( - name="executor", - description="Specialized agent for writing/executing code", - runnable=exe_graph, -) - -subagents = [exe_subagent] - -agent = create_deep_agent( - model=llm, - # tools=[internet_search], - system_prompt="You are a data scientist. When asked to write code, use the executor agent.", - subagents=[exe_subagent], -) - - -results = [] - - -def run(query: str): - results.append(result := agent.invoke({"messages": [HumanMessage(query)]})) - return result - - -run("Write a very minimal python script to compute Pi using Monte Carlo.") From 355b180b00b093236b2a16d8eb6c72e8b136933f Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 15 Dec 2025 14:51:32 -0700 Subject: [PATCH 21/36] yes --- src/ursa/experimental/agents/multiagent.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 7e25a038..25a34326 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -39,12 +39,11 @@ def tag(tag_name: str, content: str): + """Wrap content in XML tag""" return f"\n<{tag_name}>\n{content}\n\n\n" -# NOTE: Is the solution to have a tool that breaks up the string plan, and then -# execute each section of the plan? -# TODO: Try doing this instead: +# NOTE: Resources # https://docs.langchain.com/oss/python/langchain/multi-agent#where-to-customize def make_execute_plan_tool(llm: BaseChatModel, workspace: Path): execution_agent = ExecutionAgent(llm)._action From 52a4188bfb7ee39da93e41c793cd935cf3be677d Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Thu, 18 Dec 2025 14:44:10 -0700 Subject: [PATCH 22/36] add comments --- tests/agents/test_multiagent/test_multiagent.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index c8f3dd55..bf2b516f 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -37,6 +37,7 @@ def generate_data(data_path: Path): pd.DataFrame(dict(x=x, y=y)).to_csv(data_path, index=False) +# Generate data if not already present. workspace = Path(__file__).parent / "workspace" data_dir = workspace / "data" data_csv = data_dir / "data.csv" @@ -44,6 +45,7 @@ def generate_data(data_path: Path): data_dir.mkdir(exist_ok=True, parents=True) generate_data(data_dir / "data.csv") +# Initialize agent. agent = Ursa( llm, max_reflection_steps=0, @@ -51,6 +53,7 @@ def generate_data(data_path: Path): checkpointer=InMemorySaver(), ).create() +# Store results (AI output) in this list. results = [] @@ -94,6 +97,7 @@ def run(query: str): (in `analysis.py`). """ +# An alternate query to test. query_2 = """ I have a file `data/data.csv`. From 187da3bb4844662b1b4f3234cd003776ce12b027 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Thu, 18 Dec 2025 17:22:05 -0700 Subject: [PATCH 23/36] update model --- .../agents/test_multiagent/test_multiagent.py | 75 +++++++++---------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index bf2b516f..c86348ba 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -12,9 +12,9 @@ from ursa.experimental.agents.multiagent import Ursa -aiportal = False +use_aiportal = False -if aiportal: +if use_aiportal: llm = init_chat_model( model=os.environ["CLAUDE"], base_url=os.environ["AIPORTAL_API_URL"], @@ -24,6 +24,7 @@ http_client=httpx.Client(verify=False), ) else: + # Use openai llm = init_chat_model("openai:gpt-5.2") @@ -37,42 +38,6 @@ def generate_data(data_path: Path): pd.DataFrame(dict(x=x, y=y)).to_csv(data_path, index=False) -# Generate data if not already present. -workspace = Path(__file__).parent / "workspace" -data_dir = workspace / "data" -data_csv = data_dir / "data.csv" -if not data_csv.exists(): - data_dir.mkdir(exist_ok=True, parents=True) - generate_data(data_dir / "data.csv") - -# Initialize agent. -agent = Ursa( - llm, - max_reflection_steps=0, - workspace=workspace, - checkpointer=InMemorySaver(), -).create() - -# Store results (AI output) in this list. -results = [] - - -def run(query: str): - print(f"Task:\n{query}") - results.append( - result := agent.invoke( - {"messages": [HumanMessage(query)]}, - { - "configurable": { - "thread_id": "ursa", - }, - "recursion_limit": 50, - }, - ) - ) - return result - - # TODO: Need to make `uv run` a SAFE command. query_1 = """ I have a file `data/data.csv`. @@ -109,6 +74,40 @@ def run(query: str): def test_multiagent(): + # Generate data if not already present. + workspace = Path(__file__).parent / "workspace" + data_dir = workspace / "data" + data_csv = data_dir / "data.csv" + if not data_csv.exists(): + data_dir.mkdir(exist_ok=True, parents=True) + generate_data(data_dir / "data.csv") + + # Initialize agent. + agent = Ursa( + llm, + max_reflection_steps=0, + workspace=workspace, + checkpointer=InMemorySaver(), + ).create() + + # Store results (AI output) in this list. + results = [] + + def run(query: str): + print(f"Task:\n{query}") + results.append( + result := agent.invoke( + {"messages": [HumanMessage(query)]}, + { + "configurable": { + "thread_id": "ursa", + }, + "recursion_limit": 50, + }, + ) + ) + return result + run(query_1) for result in results: From a8f4594f12614adc0fc944a8def06ac8034201e3 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 22 Dec 2025 17:27:40 -0700 Subject: [PATCH 24/36] dynamic llm in multiagent test --- tests/agents/test_multiagent/test_multiagent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index c86348ba..62bf08d4 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -25,7 +25,7 @@ ) else: # Use openai - llm = init_chat_model("openai:gpt-5.2") + llm = init_chat_model(os.getenv("URSA_TEST_LLM", "openai:gpt-5.2")) def generate_data(data_path: Path): From d37333db898ff0112f6fafbf972669d9e5769e23 Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Wed, 24 Dec 2025 21:19:49 -0700 Subject: [PATCH 25/36] Update test_multiagent.py --- tests/agents/test_multiagent/test_multiagent.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index 62bf08d4..3bb75a70 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -12,20 +12,9 @@ from ursa.experimental.agents.multiagent import Ursa -use_aiportal = False - -if use_aiportal: - llm = init_chat_model( - model=os.environ["CLAUDE"], - base_url=os.environ["AIPORTAL_API_URL"], - api_key=os.environ["AIPORTAL_API_KEY"], - model_provider="openai", - model_kwargs={"extra_headers": {"disable_fallbacks": "true"}}, - http_client=httpx.Client(verify=False), - ) -else: - # Use openai - llm = init_chat_model(os.getenv("URSA_TEST_LLM", "openai:gpt-5.2")) + +# Use openai for the test on github +llm = init_chat_model(os.getenv("URSA_TEST_LLM", "openai:gpt-5.2")) def generate_data(data_path: Path): From ad8b21f6a915f693a3c94d5a694d24057ebc524f Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Wed, 24 Dec 2025 21:21:31 -0700 Subject: [PATCH 26/36] Update test_multiagent.py --- tests/agents/test_multiagent/test_multiagent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index 3bb75a70..7b35dcbd 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -5,7 +5,6 @@ import os from pathlib import Path -import httpx from langchain.chat_models import init_chat_model from langchain.messages import HumanMessage from langgraph.checkpoint.memory import InMemorySaver From ffc2ebb96ac240fd13ddf8f5c805b6186cec5efe Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Wed, 24 Dec 2025 21:23:28 -0700 Subject: [PATCH 27/36] Small formatting update. --- tests/agents/test_multiagent/test_multiagent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index 7b35dcbd..f4f7efa3 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -1,7 +1,6 @@ # NOTE: This will be helpful for prompting. # https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide - import os from pathlib import Path From f30b7e5811b5a54349349ec8e6ed586c5341b418 Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Wed, 24 Dec 2025 21:25:53 -0700 Subject: [PATCH 28/36] Small formatting update. --- tests/agents/test_multiagent/test_multiagent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index f4f7efa3..d9ed9c43 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -1,5 +1,5 @@ # NOTE: This will be helpful for prompting. -# https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide +# https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide import os from pathlib import Path From 49dc312ea378a8cdd9485c96ca4b2c054c560247 Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Thu, 25 Dec 2025 12:18:12 -0700 Subject: [PATCH 29/36] Formatting --- tests/agents/test_multiagent/test_multiagent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/agents/test_multiagent/test_multiagent.py b/tests/agents/test_multiagent/test_multiagent.py index d9ed9c43..acbfeb3f 100644 --- a/tests/agents/test_multiagent/test_multiagent.py +++ b/tests/agents/test_multiagent/test_multiagent.py @@ -10,7 +10,6 @@ from ursa.experimental.agents.multiagent import Ursa - # Use openai for the test on github llm = init_chat_model(os.getenv("URSA_TEST_LLM", "openai:gpt-5.2")) From d00f2ba09147795d28c90b037f84e8bdc91410d2 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 5 Jan 2026 16:42:28 -0700 Subject: [PATCH 30/36] default extra_tools to None --- src/ursa/experimental/agents/multiagent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 25a34326..d89e7907 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -145,7 +145,7 @@ class Ursa: def __init__( self, llm: BaseChatModel, - extra_tools: list = [], + extra_tools: Optional[list] = None, workspace: Path = Path("ursa-workspace"), checkpointer: Optional[BaseCheckpointSaver] = None, thread_id: str = "ursa", @@ -153,7 +153,7 @@ def __init__( system_prompt: str = system_prompt, ): self.llm = llm - self.extra_tools = extra_tools + self.extra_tools = extra_tools or [] self.workspace = workspace self.checkpointer = checkpointer self.thread_id = thread_id From d0b4f7e7864c34706326155a121d6f2d754f6568 Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 5 Jan 2026 16:46:19 -0700 Subject: [PATCH 31/36] change default workspace --- .gitignore | 1 - src/ursa/experimental/agents/multiagent.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c43b5c40..a7389580 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,3 @@ arxiv_papers ursa_workspace/ .vscode/settings.json scratch/ -ursa-workspace/ diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index d89e7907..e2f8eada 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -146,7 +146,7 @@ def __init__( self, llm: BaseChatModel, extra_tools: Optional[list] = None, - workspace: Path = Path("ursa-workspace"), + workspace: Path = Path("ursa_workspace"), checkpointer: Optional[BaseCheckpointSaver] = None, thread_id: str = "ursa", max_reflection_steps: int = 1, From 8c15502a8b719b5c51003fa68f9ceafdfe7693ab Mon Sep 17 00:00:00 2001 From: Arthur Lui Date: Mon, 5 Jan 2026 16:46:49 -0700 Subject: [PATCH 32/36] add space --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a7389580..3093dabc 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ arxiv_papers ursa_workspace/ .vscode/settings.json scratch/ + From 8fbcc44fcd8e5fc54a7dc691031bd6907a06d708 Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Fri, 9 Jan 2026 10:22:35 -0700 Subject: [PATCH 33/36] Fix to address failed test Alex's recent refactor got rid of the _action method and made invoke a method of each agent directly. Removed _action and this should pass the other CI tests. Co-authored-by: lui-arthur --- src/ursa/experimental/agents/multiagent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index e2f8eada..1c24368e 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -46,7 +46,7 @@ def tag(tag_name: str, content: str): # NOTE: Resources # https://docs.langchain.com/oss/python/langchain/multi-agent#where-to-customize def make_execute_plan_tool(llm: BaseChatModel, workspace: Path): - execution_agent = ExecutionAgent(llm)._action + execution_agent = ExecutionAgent(llm) @tool( "execute_plan_tool", @@ -125,7 +125,7 @@ def call_agent(query: str): def make_execution_tool(llm: BaseChatModel, workspace: Path): - execution_agent = ExecutionAgent(llm)._action + execution_agent = ExecutionAgent(llm) @tool( "execution_agent", From c8de7b24705feb11454b98338b84a682f41a5f01 Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Fri, 9 Jan 2026 10:51:44 -0700 Subject: [PATCH 34/36] Missed one _action There was one I missed on the planning agent. Co-authored-by: lui-arthur --- src/ursa/experimental/agents/multiagent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 1c24368e..7f41a3c6 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -103,7 +103,7 @@ def execute_plan(plan: str): def make_planning_tool(llm: BaseChatModel, max_reflection_steps: int): planning_agent = PlanningAgent( llm, max_reflection_steps=max_reflection_steps - )._action + ) @tool( "planning_agent", From 52b6854bdfe966f11af280b354f16becfb79aa9f Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Wed, 21 Jan 2026 00:07:42 -0700 Subject: [PATCH 35/36] Small update toward bringing up to date with other PRs. I will do a little more updating soon. --- src/ursa/experimental/agents/multiagent.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 7f41a3c6..014905f1 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -114,10 +114,18 @@ def call_agent(query: str): "messages": [HumanMessage(query)], "reflection_steps": max_reflection_steps, }) - # return result["messages"][-1].content - plan_steps = [{"task": query}] + result["plan_steps"] + plan_steps = [{"task": query}] + [ + { + "name": plan_step.name, + "description": plan_step.description, + "expected_outputs": plan_step.expected_outputs, + "success_criteria": plan_step.success_criteria, + "requires_code": plan_step.requires_code, + } + for plan_step in result["plan"].steps + ] + plan = f"\n{json.dumps(plan_steps)}\n" - # print(json.dumps(plan_steps, indent=4)) print(yaml.dump(plan_steps)) return plan From d1f393ee8720043eb872e0d594013dd6b8cf4f1e Mon Sep 17 00:00:00 2001 From: Mike Grosskopf Date: Wed, 21 Jan 2026 10:54:35 -0700 Subject: [PATCH 36/36] Small updates - Passing checkpointer to the subagents so that they can keep a history. - Fixed a JSON Decode Error that could happen on the LLM output. - Removed some potential bad character passing to json.loads --- src/ursa/experimental/agents/multiagent.py | 84 ++++++++++++++++------ 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/src/ursa/experimental/agents/multiagent.py b/src/ursa/experimental/agents/multiagent.py index 014905f1..dd0b9da0 100644 --- a/src/ursa/experimental/agents/multiagent.py +++ b/src/ursa/experimental/agents/multiagent.py @@ -1,4 +1,5 @@ import json +import re from pathlib import Path from typing import Optional @@ -45,8 +46,18 @@ def tag(tag_name: str, content: str): # NOTE: Resources # https://docs.langchain.com/oss/python/langchain/multi-agent#where-to-customize -def make_execute_plan_tool(llm: BaseChatModel, workspace: Path): - execution_agent = ExecutionAgent(llm) +def make_execute_plan_tool( + llm: BaseChatModel, + workspace: Path, + thread_id: str, + checkpointer: Checkpointer, +): + execution_agent = ExecutionAgent( + llm, + workspace=workspace, + checkpointer=checkpointer, + thread_id=thread_id + "_plan_executor", + ) @tool( "execute_plan_tool", @@ -59,9 +70,17 @@ def execute_plan(plan: str): if plan.startswith("") and plan.endswith(""): summaries = [] - task_and_plan_steps = json.loads( + plan_string = ( plan.replace("", "").replace("", "").strip() ) + # Slight format cleaning. + # Remove control characters except \t, \n, \r + # Some LLMs respond with invalid control characters + plan_string = re.sub( + r"[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f]", "", plan_string + ) + task_and_plan_steps = json.loads(plan_string) + task = task_and_plan_steps[0]["task"] plan_steps = task_and_plan_steps[1:] for step in plan_steps: @@ -82,11 +101,8 @@ def execute_plan(plan: str): step_prompt += tag("NEXT_STEP", yaml.dump(step).strip()) print(step_prompt) - result = execution_agent.invoke({ - "messages": [HumanMessage(step_prompt)], - "workspace": str(workspace), - }) - last_step_summary = result["messages"][-1].content + result = execution_agent.invoke(step_prompt) + last_step_summary = result["messages"][-1].text summaries.append(last_step_summary) return "Grand summary of plan execution:\n\n" + "\n\n".join( summaries @@ -100,9 +116,17 @@ def execute_plan(plan: str): return execute_plan -def make_planning_tool(llm: BaseChatModel, max_reflection_steps: int): +def make_planning_tool( + llm: BaseChatModel, + max_reflection_steps: int, + thread_id: str, + checkpointer: Checkpointer, +): planning_agent = PlanningAgent( - llm, max_reflection_steps=max_reflection_steps + llm, + checkpointer=checkpointer, + thread_id=thread_id + "_planner", + max_reflection_steps=max_reflection_steps, ) @tool( @@ -132,19 +156,26 @@ def call_agent(query: str): return call_agent -def make_execution_tool(llm: BaseChatModel, workspace: Path): - execution_agent = ExecutionAgent(llm) +def make_execution_tool( + llm: BaseChatModel, + workspace: Path, + thread_id: str, + checkpointer: Checkpointer, +): + execution_agent = ExecutionAgent( + llm, + workspace=workspace, + checkpointer=checkpointer, + thread_id=thread_id + "_executor", + ) @tool( "execution_agent", description="Read and edit scripts/code, and execute arbitrary commands on command line.", ) def call_agent(query: str): - result = execution_agent.invoke({ - "messages": [HumanMessage(query)], - "workspace": str(workspace), - }) - return result["messages"][-1].content + result = execution_agent.invoke(query) + return result["messages"][-1].text return call_agent @@ -177,11 +208,24 @@ def create(self, **kwargs): kwargs: for `create_agent` """ self.subagents = [ - make_execution_tool(llm=self.llm, workspace=self.workspace), + make_execution_tool( + llm=self.llm, + workspace=self.workspace, + thread_id=self.thread_id, + checkpointer=self.checkpointer, + ), make_planning_tool( - llm=self.llm, max_reflection_steps=self.max_reflection_steps + llm=self.llm, + max_reflection_steps=self.max_reflection_steps, + thread_id=self.thread_id, + checkpointer=self.checkpointer, + ), + make_execute_plan_tool( + llm=self.llm, + workspace=self.workspace, + thread_id=self.thread_id, + checkpointer=self.checkpointer, ), - make_execute_plan_tool(llm=self.llm, workspace=self.workspace), ] self.tools = self.subagents + self.extra_tools return create_agent(