Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
8f8155b
add multiagent
luiarthur Dec 4, 2025
f3856df
plan_execute_tool
luiarthur Dec 5, 2025
066cedc
yes
luiarthur Dec 6, 2025
9b34694
yes
luiarthur Dec 6, 2025
fb2d2e6
yes
luiarthur Dec 7, 2025
659eeea
improve demo
luiarthur Dec 8, 2025
4173c11
yes
luiarthur Dec 8, 2025
aa68c1a
yes
luiarthur Dec 8, 2025
983acba
yes
luiarthur Dec 8, 2025
fe394d5
yes
luiarthur Dec 8, 2025
598309f
add todo for input/output control between agents
luiarthur Dec 9, 2025
79fc6cc
yes
luiarthur Dec 9, 2025
5beaa26
yes
luiarthur Dec 9, 2025
a118695
yes
luiarthur Dec 12, 2025
4518579
format
luiarthur Dec 12, 2025
0500290
commit run.py
luiarthur Dec 12, 2025
6c6a54e
better print
luiarthur Dec 12, 2025
3cd1993
add multiagent test
luiarthur Dec 15, 2025
39f5d38
remove dev
luiarthur Dec 15, 2025
4c8b8aa
remove deep agent
luiarthur Dec 15, 2025
355b180
yes
luiarthur Dec 15, 2025
52a4188
add comments
luiarthur Dec 18, 2025
187da3b
update model
luiarthur Dec 19, 2025
a8f4594
dynamic llm in multiagent test
luiarthur Dec 23, 2025
d37333d
Update test_multiagent.py
mikegros Dec 25, 2025
ad8b21f
Update test_multiagent.py
mikegros Dec 25, 2025
ffc2ebb
Small formatting update.
mikegros Dec 25, 2025
f30b7e5
Small formatting update.
mikegros Dec 25, 2025
49dc312
Formatting
mikegros Dec 25, 2025
d00f2ba
default extra_tools to None
luiarthur Jan 5, 2026
d0b4f7e
change default workspace
luiarthur Jan 5, 2026
8c15502
add space
luiarthur Jan 5, 2026
8fbcc44
Fix to address failed test
mikegros Jan 9, 2026
c8de7b2
Missed one _action
mikegros Jan 9, 2026
ca1ddc0
Merge branch 'main' into alui/multiagent
mikegros Jan 21, 2026
52b6854
Small update toward bringing up to date with other PRs. I will do a l…
mikegros Jan 21, 2026
7a25cbc
Merge branch 'main' into alui/multiagent
mikegros Jan 21, 2026
57d1285
Merge branch 'alui/multiagent' of github.com:lanl/ursa into alui/mult…
mikegros Jan 21, 2026
d1f393e
Small updates
mikegros Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,7 @@ shell:
pygrep pattern:
conda run --live-stream -n base watch \
grep --exclude-dir=__pycache__ --exclude-dir=.venv -r '{{ pattern }}'

[no-cd]
python:
uv run ipython --no-autoindent
2 changes: 1 addition & 1 deletion src/ursa/agents/execution_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class ExecutionState(TypedDict):
"""TypedDict representing the execution agent's mutable run state used by nodes.

Fields:
- messages: list of messages (System/Human/AI/Tool) with add_messages metadata.
- messages: list of messages (System/Human/AI/Tool).
- current_progress: short status string describing agent progress.
- code_files: list of filenames created or edited in the workspace.
- workspace: path to the working directory where files and commands run.
Expand Down
237 changes: 237 additions & 0 deletions src/ursa/experimental/agents/multiagent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import json
import re
from pathlib import Path
from typing import Optional

import yaml
from langchain.agents import create_agent
from langchain.chat_models import BaseChatModel
from langchain.messages import HumanMessage
from langchain.tools import tool
from langgraph.checkpoint.base import BaseCheckpointSaver

from ursa.agents import ExecutionAgent, PlanningAgent
from ursa.util import Checkpointer

system_prompt = """\
You are a data scientist with multiple tools.

These tools are available to you:

* planning_agent
* Use this tool whenever you are asked to plan out tasks.
* In each step of your plan, if code needs to be generated, please explicitly
state in the step that code needs to be written and executed.

* execution_agent
* Use this tool **whenever** you are asked to write/edit code or run
arbitrary commands from the command line.

* execute_plan_tool
* Use this tool if you are asked to execute a plan that starts with <PLAN>
and ends with </PLAN>.
* Do not use this tool if the <PLAN></PLAN> tags are not present in the
instruction!

Note that this project is managed by `uv. So, if you need to execute python
code, you MUST run `uv run path/to/file.py`. DO NOT run `python
/path/to/file.py` or `python3 /path/to/file.py`.
"""


def tag(tag_name: str, content: str):
"""Wrap content in XML tag"""
return f"\n<{tag_name}>\n{content}\n</{tag_name}>\n\n"


# NOTE: Resources
# https://docs.langchain.com/oss/python/langchain/multi-agent#where-to-customize
def make_execute_plan_tool(
llm: BaseChatModel,
workspace: Path,
thread_id: str,
checkpointer: Checkpointer,
):
execution_agent = ExecutionAgent(
llm,
workspace=workspace,
checkpointer=checkpointer,
thread_id=thread_id + "_plan_executor",
)

@tool(
"execute_plan_tool",
description="Execute a plan from the planning agent tool.",
)
def execute_plan(plan: str):
"""Execute plan item by item."""

print("EXECUTING PLAN")
if plan.startswith("<PLAN>") and plan.endswith("</PLAN>"):
summaries = []

plan_string = (
plan.replace("<PLAN>", "").replace("</PLAN>", "").strip()
)
# Slight format cleaning.
# Remove control characters except \t, \n, \r
# Some LLMs respond with invalid control characters
plan_string = re.sub(
r"[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f]", "", plan_string
)
task_and_plan_steps = json.loads(plan_string)

task = task_and_plan_steps[0]["task"]
plan_steps = task_and_plan_steps[1:]
for step in plan_steps:
step_prompt = (
"You are contributing a solution of an overall plan. "
"The overall plan, last step's summary, and next step are provided below. "
"With the provided information, please carry out the next step. "
"IF you write any code, be sure to execute the code to make "
"sure it properly runs."
)
step_prompt += tag("OVERALL_PLAN", task)
if len(summaries) > 0:
last_step_summary = summaries[-1]
step_prompt += tag(
"SUMMARY_OF_LAST_STEP", last_step_summary
)

step_prompt += tag("NEXT_STEP", yaml.dump(step).strip())
print(step_prompt)

result = execution_agent.invoke(step_prompt)
last_step_summary = result["messages"][-1].text
summaries.append(last_step_summary)
return "Grand summary of plan execution:\n\n" + "\n\n".join(
summaries
)
else:
return (
"Could not use `execute_plan` tool execute plan "
"as plan does not start/end with <PLAN>/</PLAN>."
)

return execute_plan


def make_planning_tool(
llm: BaseChatModel,
max_reflection_steps: int,
thread_id: str,
checkpointer: Checkpointer,
):
planning_agent = PlanningAgent(
llm,
checkpointer=checkpointer,
thread_id=thread_id + "_planner",
max_reflection_steps=max_reflection_steps,
)

@tool(
"planning_agent",
description="Create plans for arbitrary tasks",
)
def call_agent(query: str):
result = planning_agent.invoke({
"messages": [HumanMessage(query)],
"reflection_steps": max_reflection_steps,
})
plan_steps = [{"task": query}] + [
{
"name": plan_step.name,
"description": plan_step.description,
"expected_outputs": plan_step.expected_outputs,
"success_criteria": plan_step.success_criteria,
"requires_code": plan_step.requires_code,
}
for plan_step in result["plan"].steps
]

plan = f"<PLAN>\n{json.dumps(plan_steps)}\n</PLAN>"
print(yaml.dump(plan_steps))
return plan

return call_agent


def make_execution_tool(
llm: BaseChatModel,
workspace: Path,
thread_id: str,
checkpointer: Checkpointer,
):
execution_agent = ExecutionAgent(
llm,
workspace=workspace,
checkpointer=checkpointer,
thread_id=thread_id + "_executor",
)

@tool(
"execution_agent",
description="Read and edit scripts/code, and execute arbitrary commands on command line.",
)
def call_agent(query: str):
result = execution_agent.invoke(query)
return result["messages"][-1].text

return call_agent


class Ursa:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this inherit BaseAgent for usage metrics or anything else?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We talked about this, so I dont think we need to make any changes.

def __init__(
self,
llm: BaseChatModel,
extra_tools: Optional[list] = None,
workspace: Path = Path("ursa_workspace"),
checkpointer: Optional[BaseCheckpointSaver] = None,
thread_id: str = "ursa",
max_reflection_steps: int = 1,
system_prompt: str = system_prompt,
):
self.llm = llm
self.extra_tools = extra_tools or []
self.workspace = workspace
self.checkpointer = checkpointer
self.thread_id = thread_id
self.system_prompt = system_prompt
self.max_reflection_steps = max_reflection_steps
self.checkpointer = checkpointer or Checkpointer.from_workspace(
workspace
)

def create(self, **kwargs):
"""Create agent.

kwargs: for `create_agent`
"""
self.subagents = [
make_execution_tool(
llm=self.llm,
workspace=self.workspace,
thread_id=self.thread_id,
checkpointer=self.checkpointer,
),
make_planning_tool(
llm=self.llm,
max_reflection_steps=self.max_reflection_steps,
thread_id=self.thread_id,
checkpointer=self.checkpointer,
),
make_execute_plan_tool(
llm=self.llm,
workspace=self.workspace,
thread_id=self.thread_id,
checkpointer=self.checkpointer,
),
]
self.tools = self.subagents + self.extra_tools
return create_agent(
self.llm,
tools=self.tools,
system_prompt=self.system_prompt,
checkpointer=self.checkpointer,
**kwargs,
)
1 change: 1 addition & 0 deletions tests/agents/test_multiagent/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
workspace/
101 changes: 101 additions & 0 deletions tests/agents/test_multiagent/test_multiagent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# NOTE: This will be helpful for prompting.
# https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide

import os
from pathlib import Path

from langchain.chat_models import init_chat_model
from langchain.messages import HumanMessage
from langgraph.checkpoint.memory import InMemorySaver

from ursa.experimental.agents.multiagent import Ursa

# Use openai for the test on github
llm = init_chat_model(os.getenv("URSA_TEST_LLM", "openai:gpt-5.2"))


def generate_data(data_path: Path):
import numpy as np
import pandas as pd

rng = np.random.default_rng(0)
x = rng.uniform(0, 1, 100)
y = rng.normal(2 * x + 1, 0.1)
pd.DataFrame(dict(x=x, y=y)).to_csv(data_path, index=False)


# TODO: Need to make `uv run` a SAFE command.
query_1 = """
I have a file `data/data.csv`.

**First**, read the first few lines of the file to understand the format.
Do this quickly; don't go overboard.

**Then**, write a plan (with at most 4 steps) to perform simple linear
regression on this data in python. The plan MUST NOT include code; though it
may include instruction to write code. The analysis should be **very minimal**
and AS CONCISE AS POSSIBLE. I care only about the coefficients (including an
intercept). Do not provide other information or plots.

**Then**, EXECUTE THE PLAN using execute_plan_tool. Write all code to
`analysis.py`. DO NOT write anything to `data/`. Do not write any other
files. I want a single file with the entire analysis.

**Finally**, edit `analysis.py` to make it AS CONCISE AS POSSIBLE. Don't
include code for assert, raising errors, exception handling, plots, etc. I want
ONLY a very minimal script that reads the data and then prints the linear
model's coefficients. Remember, I want A SINGLE FILE with the entire analysis
(in `analysis.py`).
"""

# An alternate query to test.
query_2 = """
I have a file `data/data.csv`.

Please write a very minimal python script to perform linear regression on this
data. The analysis shoud be as concise as possible. I care only about the
coefficients (including an intercept). Do not provide other information or
plots. Write the analysis to `analysis.py`. Run the code to ensure it works.
"""


def test_multiagent():
# Generate data if not already present.
workspace = Path(__file__).parent / "workspace"
data_dir = workspace / "data"
data_csv = data_dir / "data.csv"
if not data_csv.exists():
data_dir.mkdir(exist_ok=True, parents=True)
generate_data(data_dir / "data.csv")

# Initialize agent.
agent = Ursa(
llm,
max_reflection_steps=0,
workspace=workspace,
checkpointer=InMemorySaver(),
).create()

# Store results (AI output) in this list.
results = []

def run(query: str):
print(f"Task:\n{query}")
results.append(
result := agent.invoke(
{"messages": [HumanMessage(query)]},
{
"configurable": {
"thread_id": "ursa",
},
"recursion_limit": 50,
},
)
)
return result

run(query_1)

for result in results:
for msg in result["messages"]:
msg.pretty_print()