diff --git a/CLAUDE.md b/CLAUDE.md
index b5eb728..5bdae49 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -69,6 +69,7 @@ docker run -p 5200:5200 -v ./config.json:/app/config.json orchestration
| `backend/services/git_service.py` | Stateless git operations via subprocess + asyncio.to_thread |
| `backend/services/resource_monitor.py` | Health checks (Ollama, ComfyUI, Claude) |
| `backend/services/progress.py` | SSE broadcast, event persistence |
+| `backend/utils/xml_utils.py` | XML plan extraction and parsing (extract_xml_plan, parse_plan_xml) |
| `backend/tools/registry.py` | Injectable `ToolRegistry` class |
| `backend/tools/` | Tool implementations (RAG, Ollama, ComfyUI, file) |
| `frontend/` | React 19 + TypeScript + Vite UI (ErrorBoundary, 404 page) |
@@ -93,6 +94,7 @@ docker run -p 5200:5200 -v ./config.json:/app/config.json orchestration
- **Auth**: JWT Bearer tokens for REST, API keys (`orch_` prefix) for MCP/external executors, short-lived SSE tokens for EventSource. First registered user becomes admin.
- **Ownership**: projects have `owner_id`. Users see/modify only their own projects. Admins can access all.
- **Budget**: every API call recorded in `usage_log`, checked against limits before execution. Budget endpoints are admin-only.
+- **Plans**: XML format (source of truth in `plan_xml` column). Dual-column: `plan_xml` + `plan_json` for backward compat. Decomposer/routes prefer XML with JSON fallback. Planner has JSON fallback if Claude returns JSON despite XML prompt.
- **Models**: Ollama (free) for simple tasks, Haiku ($) for medium, Sonnet ($$) for complex
- **Tools**: registered in `ToolRegistry` class, injected via DI container
- **SSE**: short-lived token via `POST /api/events/{project_id}/token`, then stream via `GET /api/events/{project_id}?token=...`
@@ -108,7 +110,7 @@ docker run -p 5200:5200 -v ./config.json:/app/config.json orchestration
- **Traceability**: requirements numbered [R1], [R2], mapped to tasks; coverage endpoint shows gaps
- **External execution**: MCP server (`backend/mcp/server.py`) for Claude Code integration. Execution modes: auto (engine-only), hybrid (Ollama internal, Claude external), external (all external). Tasks claimed atomically via CAS, results submitted with cost tracking.
- **Git integration**: optional per-project (`repo_path` nullable). `GitService` wraps subprocess via `asyncio.to_thread()`. Config in `git.*` section. Phase 1 (foundation) complete; execution wiring (Phase 2+) pending.
-- **Tests**: Backend: pytest-asyncio (auto mode), 731 tests. Frontend: vitest + @testing-library/react, 137 tests. Load tests: 7 (excluded from CI via `slow` marker)
+- **Tests**: Backend: pytest-asyncio (auto mode), 797 tests. Frontend: vitest + @testing-library/react, 137 tests. Load tests: 7 (excluded from CI via `slow` marker)
## Git Workflow
diff --git a/backend/db/connection.py b/backend/db/connection.py
index a42d39b..b429753 100644
--- a/backend/db/connection.py
+++ b/backend/db/connection.py
@@ -59,6 +59,7 @@
completion_tokens INTEGER NOT NULL DEFAULT 0,
cost_usd REAL NOT NULL DEFAULT 0.0,
plan_json TEXT NOT NULL,
+ plan_xml TEXT,
status TEXT NOT NULL DEFAULT 'draft',
created_at REAL NOT NULL
);
diff --git a/backend/db/models_metadata.py b/backend/db/models_metadata.py
index 594e487..219973d 100644
--- a/backend/db/models_metadata.py
+++ b/backend/db/models_metadata.py
@@ -63,6 +63,7 @@
Column("completion_tokens", Integer, nullable=False, server_default="0"),
Column("cost_usd", Float, nullable=False, server_default="0.0"),
Column("plan_json", Text, nullable=False),
+ Column("plan_xml", Text, nullable=True),
Column("status", Text, nullable=False, server_default="draft"),
Column("created_at", Float, nullable=False),
)
diff --git a/backend/migrations/versions/015_add_plan_xml.py b/backend/migrations/versions/015_add_plan_xml.py
new file mode 100644
index 0000000..da03d08
--- /dev/null
+++ b/backend/migrations/versions/015_add_plan_xml.py
@@ -0,0 +1,32 @@
+# Orchestration Engine - Migration 015
+#
+# Add plan_xml column to plans table for XML plan storage.
+# Existing plans remain in plan_json; new plans write both columns.
+#
+# Depends on: 014_add_api_keys_and_claim_tracking
+# Used by: services/planner.py, services/decomposer.py
+
+"""Add plan_xml column to plans table.
+
+Revision ID: 015
+Revises: 014
+Create Date: 2026-03-06
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+revision = "015"
+down_revision = "014"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ with op.batch_alter_table("plans") as batch_op:
+ batch_op.add_column(sa.Column("plan_xml", sa.Text(), nullable=True))
+
+
+def downgrade():
+ with op.batch_alter_table("plans") as batch_op:
+ batch_op.drop_column("plan_xml")
diff --git a/backend/models/schemas.py b/backend/models/schemas.py
index 3a7b6c8..6e89db7 100644
--- a/backend/models/schemas.py
+++ b/backend/models/schemas.py
@@ -95,7 +95,8 @@ class PlanOut(BaseModel):
prompt_tokens: int
completion_tokens: int
cost_usd: float
- plan: dict # The structured plan JSON
+ plan: dict # The structured plan data (parsed from XML or JSON)
+ plan_xml: str | None = None # Raw XML plan (if available)
status: PlanStatus
created_at: float
diff --git a/backend/routes/projects.py b/backend/routes/projects.py
index 0eef113..575a8d3 100644
--- a/backend/routes/projects.py
+++ b/backend/routes/projects.py
@@ -35,6 +35,15 @@
# Helpers
# ---------------------------------------------------------------------------
+def _parse_plan_from_row(row) -> dict:
+ """Parse plan data from a DB row, preferring plan_xml over plan_json."""
+ plan_xml_raw = row["plan_xml"]
+ if plan_xml_raw:
+ from backend.utils.xml_utils import parse_plan_xml
+ return parse_plan_xml(plan_xml_raw)
+ return json.loads(row["plan_json"])
+
+
async def _row_to_project(
row, db: Database,
include_task_summary: bool = False,
@@ -298,7 +307,8 @@ async def list_plans(
prompt_tokens=r["prompt_tokens"],
completion_tokens=r["completion_tokens"],
cost_usd=r["cost_usd"],
- plan=json.loads(r["plan_json"]),
+ plan=_parse_plan_from_row(r),
+ plan_xml=r["plan_xml"],
status=r["status"],
created_at=r["created_at"],
)
@@ -438,9 +448,10 @@ async def clone_project(
new_plan_id = uuid.uuid4().hex[:12]
await db.execute_write(
"INSERT INTO plans (id, project_id, version, model_used, prompt_tokens, "
- "completion_tokens, cost_usd, plan_json, status, created_at) "
- "VALUES (?, ?, 1, ?, 0, 0, 0.0, ?, 'draft', ?)",
- (new_plan_id, new_project_id, plan_row["model_used"], plan_row["plan_json"], now),
+ "completion_tokens, cost_usd, plan_json, plan_xml, status, created_at) "
+ "VALUES (?, ?, 1, ?, 0, 0, 0.0, ?, ?, 'draft', ?)",
+ (new_plan_id, new_project_id, plan_row["model_used"],
+ plan_row["plan_json"], plan_row["plan_xml"], now),
)
# 3. Clone tasks (reset status, clear output/cost/retry)
@@ -512,8 +523,8 @@ async def export_project(
{
"id": p["id"], "version": p["version"], "model_used": p["model_used"],
"prompt_tokens": p["prompt_tokens"], "completion_tokens": p["completion_tokens"],
- "cost_usd": p["cost_usd"], "plan": json.loads(p["plan_json"]),
- "status": p["status"], "created_at": p["created_at"],
+ "cost_usd": p["cost_usd"], "plan": _parse_plan_from_row(p),
+ "plan_xml": p["plan_xml"], "status": p["status"], "created_at": p["created_at"],
}
for p in plan_rows
]
diff --git a/backend/services/decomposer.py b/backend/services/decomposer.py
index 830b47d..29e0642 100644
--- a/backend/services/decomposer.py
+++ b/backend/services/decomposer.py
@@ -1,8 +1,8 @@
# Orchestration Engine - Plan Decomposer
#
-# Converts an approved plan JSON into task rows with dependency edges.
+# Converts an approved plan (XML or JSON) into task rows with dependency edges.
#
-# Depends on: backend/config.py, services/model_router.py
+# Depends on: backend/config.py, services/model_router.py, utils/xml_utils.py
# Used by: routes/projects.py, container.py
import json
@@ -66,7 +66,13 @@ async def decompose(self, project_id: str, plan_id: str) -> dict:
if plan_row["project_id"] != project_id:
raise NotFoundError(f"Plan {plan_id} does not belong to project {project_id}")
- plan_data = json.loads(plan_row["plan_json"])
+ # Prefer XML plan (source of truth) with JSON fallback
+ plan_xml_raw = plan_row["plan_xml"]
+ if plan_xml_raw:
+ from backend.utils.xml_utils import parse_plan_xml
+ plan_data = parse_plan_xml(plan_xml_raw)
+ else:
+ plan_data = json.loads(plan_row["plan_json"])
tasks_data, phase_names = _flatten_plan_tasks(plan_data)
if not tasks_data:
diff --git a/backend/services/planner.py b/backend/services/planner.py
index b436d0f..b962db1 100644
--- a/backend/services/planner.py
+++ b/backend/services/planner.py
@@ -17,6 +17,7 @@
from backend.models.enums import PlanningRigor, PlanStatus, ProjectStatus
from backend.services.model_router import calculate_cost
from backend.utils.json_utils import extract_json_object, parse_requirements
+from backend.utils.xml_utils import extract_xml_plan, parse_plan_xml
logger = logging.getLogger("orchestration.planner")
@@ -71,110 +72,128 @@
"""
-_TASK_SCHEMA = """{
- "title": "Short task title",
- "description": "Detailed description...",
- "task_type": "code|research|analysis|asset|integration|documentation",
- "complexity": "simple|medium|complex",
- "depends_on": [],
- "tools_needed": ["search_knowledge", "lookup_type", "local_llm", "generate_image", "read_file", "write_file"],
- "requirement_ids": ["R1", "R3"],
- "verification_criteria": "How to verify this task was completed correctly",
- "affected_files": ["src/auth.ts", "db/schema.sql"]
- }"""
-
-_RIGOR_SUFFIX_L1 = f"""Produce a JSON plan with this exact structure:
-{{
- "summary": "Brief summary of what will be built",
- "tasks": [
- {_TASK_SCHEMA}
- ]
-}}
-
+_TASK_SCHEMA_XML = """
+ Short task title
+ Detailed description of what this task does
+ code
+ medium
+
+ search_knowledge,read_file,write_file
+ R1,R3
+ How to verify this task was completed correctly
+ src/auth.ts,db/schema.sql
+ """
+
+_RIGOR_SUFFIX_L1 = f"""Produce an XML plan with this exact structure:
+
+
+ Brief summary of what will be built
+
+{_TASK_SCHEMA_XML}
+
+
+
+Task field notes:
+- task_type: code|research|analysis|asset|integration|documentation
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices (empty if no dependencies)
+- tools_needed: comma-separated from: search_knowledge, lookup_type, local_llm, generate_image, read_file, write_file
+- requirement_ids: comma-separated (e.g. R1,R3)
+- affected_files: comma-separated file paths
- Aim for 3-15 tasks. Too few means tasks are too large; too many means overhead.
-
-Respond with ONLY the JSON plan, no markdown fences or explanation."""
-
-_RIGOR_SUFFIX_L2 = f"""Produce a JSON plan organized into phases. Each phase groups related tasks into a logical stage of work.
-
-{{
- "summary": "Brief summary of what will be built",
- "phases": [
- {{
- "name": "Phase name (e.g. 'Foundation', 'Core Logic', 'Integration')",
- "description": "What this phase accomplishes and why it comes at this point",
- "tasks": [
- {_TASK_SCHEMA}
- ]
- }}
- ],
- "open_questions": [
- {{
- "question": "An ambiguity or decision in the requirements",
- "proposed_answer": "How you propose to handle it",
- "impact": "What changes if the answer differs"
- }}
- ]
-}}
+- Use XML entities for special characters in descriptions: < > &
+
+Respond with ONLY the XML plan, no markdown fences or explanation."""
+
+_RIGOR_SUFFIX_L2 = f"""Produce an XML plan organized into phases. Each phase groups related tasks into a logical stage of work.
+
+
+ Brief summary of what will be built
+
+
+ What this phase accomplishes and why it comes at this point
+{_TASK_SCHEMA_XML}
+
+
+
+
+ An ambiguity or decision in the requirements
+ How you propose to handle it
+ What changes if the answer differs
+
+
+
+
+Task field notes:
+- task_type: code|research|analysis|asset|integration|documentation
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices, GLOBAL across all phases (empty if none)
+- tools_needed: comma-separated from: search_knowledge, lookup_type, local_llm, generate_image, read_file, write_file
+- requirement_ids: comma-separated (e.g. R1,R3)
+- affected_files: comma-separated file paths
+- Use XML entities for special characters in descriptions: < > &
Phase guidelines:
- Group related tasks into 2-5 phases that represent logical stages of work.
- Name phases clearly: "Research & Discovery", "Core Implementation", "Integration & Testing", etc.
- Earlier phases should have no dependencies on later phases.
-- depends_on indices are GLOBAL across all phases (0-based from the first task in the first phase).
- Aim for 3-15 total tasks across all phases.
Open questions:
- Surface 1-5 ambiguities, assumptions, or decisions that could affect the plan.
-- Each must include a proposed_answer so the user can approve or override quickly.
-
-Respond with ONLY the JSON plan, no markdown fences or explanation."""
-
-_RIGOR_SUFFIX_L3 = f"""Produce a thorough JSON plan organized into phases with risk analysis and test strategy.
-
-{{
- "summary": "Brief summary of what will be built",
- "phases": [
- {{
- "name": "Phase name (e.g. 'Foundation', 'Core Logic', 'Integration')",
- "description": "What this phase accomplishes and why it comes at this point",
- "tasks": [
- {_TASK_SCHEMA}
- ]
- }}
- ],
- "open_questions": [
- {{
- "question": "An ambiguity or decision in the requirements",
- "proposed_answer": "How you propose to handle it",
- "impact": "What changes if the answer differs"
- }}
- ],
- "risk_assessment": [
- {{
- "risk": "Description of a technical or schedule risk",
- "likelihood": "low|medium|high",
- "impact": "low|medium|high",
- "mitigation": "How to reduce or handle this risk"
- }}
- ],
- "test_strategy": {{
- "approach": "Overall testing approach description",
- "test_tasks": ["Task titles that represent test/verification work"],
- "coverage_notes": "What areas need testing and how"
- }}
-}}
+- Each must include a proposed answer so the user can approve or override quickly.
+
+Respond with ONLY the XML plan, no markdown fences or explanation."""
+
+_RIGOR_SUFFIX_L3 = f"""Produce a thorough XML plan organized into phases with risk analysis and test strategy.
+
+
+ Brief summary of what will be built
+
+
+ What this phase accomplishes
+{_TASK_SCHEMA_XML}
+
+
+
+
+ An ambiguity or decision in the requirements
+ How you propose to handle it
+ What changes if the answer differs
+
+
+
+
+ Description of a technical or schedule risk
+ medium
+ high
+ How to reduce or handle this risk
+
+
+
+ Overall testing approach description
+ Task title 1,Task title 2
+ What areas need testing and how
+
+
+
+Task field notes:
+- task_type: code|research|analysis|asset|integration|documentation
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices, GLOBAL across all phases (empty if none)
+- tools_needed: comma-separated from: search_knowledge, lookup_type, local_llm, generate_image, read_file, write_file
+- requirement_ids: comma-separated (e.g. R1,R3)
+- affected_files: comma-separated file paths
+- Use XML entities for special characters in descriptions: < > &
Phase guidelines:
- Group related tasks into 2-5 phases that represent logical stages of work.
- Name phases clearly: "Research & Discovery", "Core Implementation", "Integration & Testing", etc.
- Earlier phases should have no dependencies on later phases.
-- depends_on indices are GLOBAL across all phases (0-based from the first task in the first phase).
- Aim for 5-15 total tasks across all phases.
Open questions:
- Surface 1-5 ambiguities, assumptions, or decisions that could affect the plan.
-- Each must include a proposed_answer so the user can approve or override quickly.
Risk assessment:
- Identify 2-5 technical, integration, or scope risks.
@@ -185,7 +204,7 @@
- Reference specific tasks that perform testing/verification.
- Note coverage gaps the user should be aware of.
-You may optionally begin your response with a block to reason through dependencies, risks, and trade-offs before producing the plan. After your reasoning (if any), output the JSON plan with no markdown fences."""
+You may optionally begin your response with a block to reason through the plan before producing it. After your reasoning (if any), output the XML plan."""
_RIGOR_SUFFIXES = {
PlanningRigor.L1: _RIGOR_SUFFIX_L1,
@@ -228,57 +247,63 @@ def _build_system_prompt(rigor: PlanningRigor) -> str:
"""
-_CSHARP_TASK_SCHEMA = """{
- "title": "ClassName.MethodName",
- "description": "What this method does, including behavioral contract and edge cases",
- "task_type": "csharp_method",
- "complexity": "simple|medium|complex",
- "depends_on": [],
- "target_class": "Namespace.ClassName",
- "target_signature": "public async Task MethodName(ParamType param)",
- "available_methods": ["signatures of other methods in the same class or injected services"],
- "constructor_params": ["IDbContext db", "ILogger logger"],
- "requirement_ids": ["R1"],
- "verification_criteria": "How to verify this method works correctly",
- "affected_files": ["src/Services/MyService.cs"]
- }"""
-
-_CSHARP_RIGOR_SUFFIX = f"""Produce a JSON plan organized into phases. Each phase corresponds to one class being modified or created.
-
-{{
- "summary": "Brief summary of the feature being implemented",
- "phases": [
- {{
- "name": "ClassName (e.g. 'UserService', 'OrderValidator')",
- "description": "What this class does and why these methods are needed",
- "tasks": [
- {_CSHARP_TASK_SCHEMA}
- ]
- }}
- ],
- "open_questions": [
- {{
- "question": "An ambiguity or decision in the requirements",
- "proposed_answer": "How you propose to handle it",
- "impact": "What changes if the answer differs"
- }}
- ],
- "assembly_config": {{
- "new_files": ["Paths to new .cs files that need to be created"],
- "modified_files": ["Paths to existing .cs files that will be modified"]
- }}
-}}
+_CSHARP_TASK_SCHEMA_XML = """
+ ClassName.MethodName
+ What this method does, including behavioral contract and edge cases
+ csharp_method
+ medium
+
+ Namespace.ClassName
+ public async Task<bool> MethodName(ParamType param)
+ signatures of other methods in the same class
+ IDbContext db,ILogger logger
+ R1
+ How to verify this method works correctly
+ src/Services/MyService.cs
+ """
+
+_CSHARP_RIGOR_SUFFIX = f"""Produce an XML plan organized into phases. Each phase corresponds to one class being modified or created.
+
+
+ Brief summary of the feature being implemented
+
+
+ What this class does and why these methods are needed
+{_CSHARP_TASK_SCHEMA_XML}
+
+
+
+
+ An ambiguity or decision in the requirements
+ How you propose to handle it
+ What changes if the answer differs
+
+
+
+ path/to/NewFile.cs
+ path/to/ExistingFile.cs
+
+
+
+Task field notes:
+- task_type is always csharp_method for method-level tasks
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices, GLOBAL across all phases
+- target_class: full namespace-qualified class name
+- target_signature: exact method signature (use < > for generics)
+- available_methods: comma-separated signatures of other methods in the class
+- constructor_params: comma-separated injected dependencies
+- Use XML entities for special characters: < > &
Phase guidelines:
- One phase per class. Phase name = class name.
- Within a phase, order tasks so independent methods come first.
-- depends_on indices are GLOBAL across all phases (0-based from the first task in the first phase).
- After all method tasks in a phase, the system will auto-create an assembly task to stitch and build.
Open questions:
- Surface 1-5 ambiguities about the requirements or existing code structure.
-Respond with ONLY the JSON plan, no markdown fences or explanation."""
+Respond with ONLY the XML plan, no markdown fences or explanation."""
def _build_csharp_system_prompt(type_map: str) -> str:
@@ -421,16 +446,23 @@ async def generate(
completion_tokens = response.usage.output_tokens
cost = calculate_cost(PLANNING_MODEL, prompt_tokens, completion_tokens)
- # Parse the plan JSON
- try:
- plan_data = json.loads(response_text)
- except json.JSONDecodeError:
- # Try to extract JSON from the response (in case of markdown fences).
- # Use a balanced-brace approach to find the outermost JSON object,
- # instead of a greedy regex that could match too much.
- plan_data = extract_json_object(response_text)
- if plan_data is None:
- raise PlanParseError("Failed to parse plan JSON from Claude response")
+ # Parse the plan (XML primary, JSON fallback)
+ plan_xml_str = extract_xml_plan(response_text)
+ if plan_xml_str:
+ try:
+ plan_data = parse_plan_xml(plan_xml_str)
+ except Exception as xml_err:
+ logger.warning("XML plan parse failed, trying JSON fallback: %s", xml_err)
+ plan_xml_str = None # Clear so we don't store bad XML
+
+ if not plan_xml_str:
+ # Fallback: try JSON (backward compat or if Claude ignored XML instruction)
+ try:
+ plan_data = json.loads(response_text)
+ except json.JSONDecodeError:
+ plan_data = extract_json_object(response_text)
+ if plan_data is None:
+ raise PlanParseError("Failed to parse plan from Claude response")
except Exception:
# Record actual API spend even if parsing failed — prevents budget leak
@@ -476,10 +508,11 @@ async def generate(
now = time.time()
await db.execute_write(
"INSERT INTO plans (id, project_id, version, model_used, prompt_tokens, "
- "completion_tokens, cost_usd, plan_json, status, created_at) "
- "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ "completion_tokens, cost_usd, plan_json, plan_xml, status, created_at) "
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(plan_id, project_id, version, PLANNING_MODEL, prompt_tokens,
- completion_tokens, cost, json.dumps(plan_data), PlanStatus.DRAFT, now),
+ completion_tokens, cost, json.dumps(plan_data), plan_xml_str,
+ PlanStatus.DRAFT, now),
)
# Record spending and release reservation
diff --git a/backend/utils/xml_utils.py b/backend/utils/xml_utils.py
new file mode 100644
index 0000000..3061516
--- /dev/null
+++ b/backend/utils/xml_utils.py
@@ -0,0 +1,161 @@
+# Orchestration Engine - XML Plan Utilities
+#
+# Extraction and parsing of XML plans from LLM output.
+# Converts XML plan format to dicts matching the existing PlanData shape
+# so downstream code (decomposer, routes, frontend) stays unchanged.
+#
+# Depends on: (none — stdlib only)
+# Used by: services/planner.py, services/decomposer.py
+
+import re
+import xml.etree.ElementTree as ET
+
+
+def extract_xml_plan(text: str) -> str | None:
+ """Extract the ... block from LLM response text.
+
+ Handles markdown fences, preamble text, and blocks.
+ Returns the raw XML string (including the tags) or None.
+ """
+ # Strip markdown fences
+ text = re.sub(r"```(?:xml)?\s*\n?", "", text)
+
+ # Find
+ start = text.find("")
+ if end == -1:
+ return None
+
+ return text[start:end + len("")]
+
+
+def _text(el: ET.Element | None) -> str:
+ """Get text content of an element, defaulting to empty string."""
+ if el is None:
+ return ""
+ return (el.text or "").strip()
+
+
+def _split_csv(value: str) -> list[str]:
+ """Split a comma-separated string, filtering empty values."""
+ return [v.strip() for v in value.split(",") if v.strip()]
+
+
+def _parse_task(task_el: ET.Element) -> dict:
+ """Convert a element to a dict matching the JSON task schema."""
+ task = {
+ "title": _text(task_el.find("title")),
+ "description": _text(task_el.find("description")),
+ "task_type": _text(task_el.find("task_type")) or "code",
+ "complexity": _text(task_el.find("complexity")) or "medium",
+ "tools_needed": _split_csv(_text(task_el.find("tools_needed"))),
+ "requirement_ids": _split_csv(_text(task_el.find("requirement_ids"))),
+ "verification_criteria": _text(task_el.find("verification_criteria")),
+ "affected_files": _split_csv(_text(task_el.find("affected_files"))),
+ }
+
+ # depends_on: comma-separated integers
+ deps_text = _text(task_el.find("depends_on"))
+ if deps_text:
+ task["depends_on"] = [int(d.strip()) for d in deps_text.split(",") if d.strip()]
+ else:
+ task["depends_on"] = []
+
+ # C# specific fields (optional)
+ for field in ("target_class", "target_signature"):
+ val = _text(task_el.find(field))
+ if val:
+ task[field] = val
+
+ # C# list fields
+ for field in ("available_methods", "constructor_params"):
+ val = _text(task_el.find(field))
+ if val:
+ task[field] = _split_csv(val)
+
+ return task
+
+
+def _parse_question(q_el: ET.Element) -> dict:
+ """Convert a element to a dict."""
+ return {
+ "question": _text(q_el.find("ask")),
+ "proposed_answer": _text(q_el.find("proposed")),
+ "impact": _text(q_el.find("impact")),
+ }
+
+
+def _parse_risk(r_el: ET.Element) -> dict:
+ """Convert a element to a dict."""
+ return {
+ "risk": _text(r_el.find("description")),
+ "likelihood": _text(r_el.find("likelihood")) or "medium",
+ "impact": _text(r_el.find("impact")) or "medium",
+ "mitigation": _text(r_el.find("mitigation")),
+ }
+
+
+def parse_plan_xml(xml_str: str) -> dict:
+ """Parse an XML plan string into a dict matching the PlanData shape.
+
+ Supports L1 (flat tasks), L2 (phased + questions), L3 (+ risks + test strategy),
+ and C# reflection plans (+ target_class, target_signature, assembly_config).
+
+ Returns a dict identical in structure to what the JSON planner produces,
+ so downstream code (decomposer, routes, frontend) needs no changes.
+ """
+ root = ET.fromstring(xml_str)
+
+ result: dict = {
+ "summary": _text(root.find("summary")),
+ }
+
+ # L1: flat container
+ tasks_el = root.find("tasks")
+ if tasks_el is not None:
+ result["tasks"] = [_parse_task(t) for t in tasks_el.findall("task")]
+
+ # L2+: container
+ phases_el = root.find("phases")
+ if phases_el is not None:
+ phases = []
+ for phase_el in phases_el.findall("phase"):
+ phase = {
+ "name": phase_el.get("name", ""),
+ "description": _text(phase_el.find("description")),
+ "tasks": [_parse_task(t) for t in phase_el.findall("task")],
+ }
+ phases.append(phase)
+ result["phases"] = phases
+
+ # L2+:
+ questions_el = root.find("questions")
+ if questions_el is not None:
+ result["open_questions"] = [_parse_question(q) for q in questions_el.findall("question")]
+
+ # L3:
+ risks_el = root.find("risks")
+ if risks_el is not None:
+ result["risk_assessment"] = [_parse_risk(r) for r in risks_el.findall("risk")]
+
+ # L3:
+ ts_el = root.find("test_strategy")
+ if ts_el is not None:
+ result["test_strategy"] = {
+ "approach": _text(ts_el.find("approach")),
+ "test_tasks": _split_csv(_text(ts_el.find("test_tasks"))),
+ "coverage_notes": _text(ts_el.find("coverage_notes")),
+ }
+
+ # C#:
+ ac_el = root.find("assembly_config")
+ if ac_el is not None:
+ result["assembly_config"] = {
+ "new_files": _split_csv(_text(ac_el.find("new_files"))),
+ "modified_files": _split_csv(_text(ac_el.find("modified_files"))),
+ }
+
+ return result
diff --git a/tests/unit/test_csharp_planner.py b/tests/unit/test_csharp_planner.py
index 5fbdfbf..446af35 100644
--- a/tests/unit/test_csharp_planner.py
+++ b/tests/unit/test_csharp_planner.py
@@ -30,10 +30,10 @@ def test_includes_csharp_preamble(self):
def test_includes_task_schema(self):
prompt = _build_csharp_system_prompt("types")
- assert "target_signature" in prompt
- assert "target_class" in prompt
- assert "available_methods" in prompt
- assert "constructor_params" in prompt
+ assert "" in prompt
+ assert "" in prompt
+ assert "" in prompt
+ assert "" in prompt
def test_includes_strategy_rules(self):
prompt = _build_csharp_system_prompt("types")
@@ -50,7 +50,7 @@ def test_generic_prompt_unchanged(self):
"""Verify the generic prompt path still works."""
prompt = _build_system_prompt(PlanningRigor.L2)
assert "project planner" in prompt
- assert "reflected_types" not in prompt
+ assert "" not in prompt
def _make_planner_db_mock(config_json):
@@ -69,8 +69,13 @@ def _make_planner_db_mock(config_json):
return mock_db
-def _make_anthropic_mock(response_text='{"summary": "test", "phases": []}'):
+_DEFAULT_CSHARP_XML = 'test'
+
+
+def _make_anthropic_mock(response_text=None):
"""Create a mock anthropic module + client."""
+ if response_text is None:
+ response_text = _DEFAULT_CSHARP_XML
mock_anthropic = AsyncMock()
mock_client = AsyncMock()
mock_response = AsyncMock()
@@ -116,7 +121,9 @@ async def test_csharp_strategy_fallback_on_reflection_failure(self):
mock_reflect.return_value = None # Reflection failed
with patch("backend.services.planner.anthropic") as mock_anthropic_mod:
- mock_anthropic, mock_client = _make_anthropic_mock('{"summary": "test", "tasks": []}')
+ mock_anthropic, mock_client = _make_anthropic_mock(
+ 'test'
+ )
mock_anthropic_mod.AsyncAnthropic.return_value = mock_client
await planner.generate("proj1")
diff --git a/tests/unit/test_planner_service.py b/tests/unit/test_planner_service.py
index 0673337..f406d88 100644
--- a/tests/unit/test_planner_service.py
+++ b/tests/unit/test_planner_service.py
@@ -5,7 +5,6 @@
# Depends on: backend/services/planner.py, backend/db/connection.py
# Used by: pytest
-import json
import time
from unittest.mock import AsyncMock, MagicMock, patch
@@ -56,14 +55,25 @@ def test_json_after_markdown_fence(self):
# TestPlannerServiceGenerate
# ---------------------------------------------------------------------------
+_DEFAULT_XML_PLAN = """
+ Test plan
+
+
+ Task 1
+ Do it
+ code
+ simple
+
+
+
+
+"""
+
+
def _make_plan_response(plan_text=None, pt=100, ct=200):
"""Build a mock Claude response for planning."""
if plan_text is None:
- plan_text = json.dumps({
- "summary": "Test plan",
- "tasks": [{"title": "Task 1", "description": "Do it", "task_type": "code",
- "complexity": "simple", "depends_on": [], "tools_needed": []}],
- })
+ plan_text = _DEFAULT_XML_PLAN
response = MagicMock()
response.content = [MagicMock(text=plan_text, type="text")]
response.usage = MagicMock(input_tokens=pt, output_tokens=ct)
diff --git a/tests/unit/test_planning_rigor.py b/tests/unit/test_planning_rigor.py
index 3a783bd..314555b 100644
--- a/tests/unit/test_planning_rigor.py
+++ b/tests/unit/test_planning_rigor.py
@@ -31,20 +31,20 @@ class TestBuildSystemPrompt:
def test_l1_prompt_contains_flat_tasks(self):
prompt = _build_system_prompt(PlanningRigor.L1)
- assert '"tasks"' in prompt
- assert '"phases"' not in prompt
+ assert "" in prompt
+ assert "" not in prompt
def test_l2_prompt_contains_phases_and_questions(self):
prompt = _build_system_prompt(PlanningRigor.L2)
- assert '"phases"' in prompt
- assert '"open_questions"' in prompt
- assert '"risk_assessment"' not in prompt
+ assert "" in prompt
+ assert "" in prompt
+ assert "" not in prompt
def test_l3_prompt_contains_risk_and_test_strategy(self):
prompt = _build_system_prompt(PlanningRigor.L3)
- assert '"phases"' in prompt
- assert '"risk_assessment"' in prompt
- assert '"test_strategy"' in prompt
+ assert "" in prompt
+ assert "" in prompt
+ assert "" in prompt
def test_all_rigor_levels_have_suffix(self):
for rigor in PlanningRigor:
@@ -161,13 +161,24 @@ def test_global_dependency_indexing_preserved(self):
# PlannerService rigor from project config
# ---------------------------------------------------------------------------
+_DEFAULT_XML_PLAN = """
+ Test plan
+
+
+ T1
+ Do it
+ code
+ simple
+
+
+
+
+"""
+
+
def _make_plan_response(plan_text=None, pt=100, ct=200):
if plan_text is None:
- plan_text = json.dumps({
- "summary": "Test plan",
- "tasks": [{"title": "T1", "description": "Do it", "task_type": "code",
- "complexity": "simple", "depends_on": [], "tools_needed": []}],
- })
+ plan_text = _DEFAULT_XML_PLAN
response = MagicMock()
response.content = [MagicMock(text=plan_text, type="text")]
response.usage = MagicMock(input_tokens=pt, output_tokens=ct)
@@ -206,8 +217,8 @@ async def test_l1_uses_flat_prompt(self, _mock_cost, rigor_db):
call_kwargs = mock_client.messages.create.call_args.kwargs
system = call_kwargs["system"]
- assert '"tasks"' in system
- assert '"phases"' not in system
+ assert "" in system
+ assert "" not in system
assert call_kwargs["max_tokens"] == _MAX_TOKENS_BY_RIGOR[PlanningRigor.L1]
@patch("backend.services.planner.calculate_cost", return_value=0.01)
@@ -226,8 +237,8 @@ async def test_l3_uses_thorough_prompt(self, _mock_cost, rigor_db):
call_kwargs = mock_client.messages.create.call_args.kwargs
system = call_kwargs["system"]
- assert '"risk_assessment"' in system
- assert '"test_strategy"' in system
+ assert "" in system
+ assert "" in system
assert call_kwargs["max_tokens"] == _MAX_TOKENS_BY_RIGOR[PlanningRigor.L3]
@patch("backend.services.planner.calculate_cost", return_value=0.01)
@@ -246,8 +257,8 @@ async def test_missing_rigor_defaults_to_l2(self, _mock_cost, rigor_db):
call_kwargs = mock_client.messages.create.call_args.kwargs
system = call_kwargs["system"]
- assert '"phases"' in system
- assert '"open_questions"' in system
+ assert "" in system
+ assert "" in system
assert call_kwargs["max_tokens"] == _MAX_TOKENS_BY_RIGOR[PlanningRigor.L2]
diff --git a/tests/unit/test_xml_utils.py b/tests/unit/test_xml_utils.py
new file mode 100644
index 0000000..ac9add9
--- /dev/null
+++ b/tests/unit/test_xml_utils.py
@@ -0,0 +1,324 @@
+# Orchestration Engine - XML Plan Utilities Tests
+#
+# Tests for extract_xml_plan() and parse_plan_xml() in xml_utils.py.
+#
+# Depends on: backend/utils/xml_utils.py
+# Used by: CI
+
+from backend.utils.xml_utils import extract_xml_plan, parse_plan_xml
+
+
+# --- extract_xml_plan tests ---
+
+
+def test_extract_simple_plan():
+ text = 'Test'
+ assert extract_xml_plan(text) == text
+
+
+def test_extract_with_preamble():
+ text = 'Here is my plan:\n\nS\n\nDone.'
+ result = extract_xml_plan(text)
+ assert result.startswith("")
+
+
+def test_extract_with_markdown_fences():
+ text = '```xml\nS\n```'
+ result = extract_xml_plan(text)
+ assert result is not None
+ assert "S" in result
+
+
+def test_extract_with_thinking_block():
+ text = 'Let me reason...\n\nS'
+ result = extract_xml_plan(text)
+ assert result.startswith("" not in result
+
+
+def test_extract_no_plan_returns_none():
+ assert extract_xml_plan("Just some text with no plan") is None
+
+
+def test_extract_unclosed_plan_returns_none():
+ assert extract_xml_plan('S') is None
+
+
+# --- parse_plan_xml tests ---
+
+
+_L1_XML = """
+ Build a widget
+
+
+ Create widget
+ Build the widget component
+ code
+ medium
+
+ read_file,write_file
+ R1
+ Widget renders
+ src/widget.ts
+
+
+ Test widget
+ Add unit tests
+ code
+ simple
+ 0
+ write_file
+ R1
+ Tests pass
+ tests/widget.test.ts
+
+
+"""
+
+
+def test_parse_l1_flat_tasks():
+ result = parse_plan_xml(_L1_XML)
+ assert result["summary"] == "Build a widget"
+ assert len(result["tasks"]) == 2
+ assert result["tasks"][0]["title"] == "Create widget"
+ assert result["tasks"][0]["task_type"] == "code"
+ assert result["tasks"][0]["depends_on"] == []
+ assert result["tasks"][0]["tools_needed"] == ["read_file", "write_file"]
+ assert result["tasks"][0]["requirement_ids"] == ["R1"]
+ assert result["tasks"][0]["affected_files"] == ["src/widget.ts"]
+ assert result["tasks"][1]["depends_on"] == [0]
+
+
+_L2_XML = """
+ Build auth system
+
+
+ Set up core auth infrastructure
+
+ User model
+ Define user table
+ code
+ medium
+
+ write_file
+ R1
+ Migration runs
+ db/models.py
+
+
+
+ Wire auth into API
+
+ Auth middleware
+ JWT validation
+ code
+ complex
+ 0
+ read_file,write_file
+ R2,R3
+ Auth tests pass
+ src/middleware.py
+
+
+
+
+
+ Use JWT or sessions?
+ JWT for stateless auth
+ Sessions would need Redis
+
+
+"""
+
+
+def test_parse_l2_phased():
+ result = parse_plan_xml(_L2_XML)
+ assert result["summary"] == "Build auth system"
+ assert len(result["phases"]) == 2
+ assert result["phases"][0]["name"] == "Foundation"
+ assert result["phases"][0]["description"] == "Set up core auth infrastructure"
+ assert len(result["phases"][0]["tasks"]) == 1
+ assert result["phases"][1]["tasks"][0]["depends_on"] == [0]
+ assert result["phases"][1]["tasks"][0]["requirement_ids"] == ["R2", "R3"]
+
+
+def test_parse_l2_open_questions():
+ result = parse_plan_xml(_L2_XML)
+ assert len(result["open_questions"]) == 1
+ q = result["open_questions"][0]
+ assert q["question"] == "Use JWT or sessions?"
+ assert q["proposed_answer"] == "JWT for stateless auth"
+ assert q["impact"] == "Sessions would need Redis"
+
+
+_L3_XML = """
+ Payment integration
+
+
+ Payment processing
+
+ Stripe client
+ Wrap Stripe API
+ code
+ complex
+
+ write_file
+ R1
+ API calls succeed
+ src/stripe.py
+
+
+
+
+
+ Which payment provider?
+ Stripe
+ Different SDK
+
+
+
+
+ Stripe rate limits during peak
+ low
+ high
+ Implement retry with exponential backoff
+
+
+
+ Mock Stripe API in tests
+ Stripe client,Payment flow
+ Cover refund edge cases
+
+"""
+
+
+def test_parse_l3_risks():
+ result = parse_plan_xml(_L3_XML)
+ assert len(result["risk_assessment"]) == 1
+ r = result["risk_assessment"][0]
+ assert r["risk"] == "Stripe rate limits during peak"
+ assert r["likelihood"] == "low"
+ assert r["impact"] == "high"
+ assert "backoff" in r["mitigation"]
+
+
+def test_parse_l3_test_strategy():
+ result = parse_plan_xml(_L3_XML)
+ ts = result["test_strategy"]
+ assert ts["approach"] == "Mock Stripe API in tests"
+ assert ts["test_tasks"] == ["Stripe client", "Payment flow"]
+ assert "refund" in ts["coverage_notes"]
+
+
+_CSHARP_XML = """
+ Implement user service
+
+
+ Core user operations
+
+ UserService.GetUser
+ Fetch user by ID
+ csharp_method
+ medium
+
+ MyApp.Services.UserService
+ public async Task<User> GetUser(Guid id)
+ Save(User u),Delete(Guid id)
+ IDbContext db,ILogger logger
+ R1
+ Returns user or throws
+ src/Services/UserService.cs
+
+
+
+
+
+ Use nullable return or exception?
+ Exception for not found
+ Changes caller error handling
+
+
+
+ src/Services/UserService.cs
+ src/DI/Container.cs
+
+"""
+
+
+def test_parse_csharp_plan():
+ result = parse_plan_xml(_CSHARP_XML)
+ task = result["phases"][0]["tasks"][0]
+ assert task["task_type"] == "csharp_method"
+ assert task["target_class"] == "MyApp.Services.UserService"
+ assert "Task" in task["target_signature"] # XML entity decoded
+ assert task["constructor_params"] == ["IDbContext db", "ILogger logger"]
+ assert task["available_methods"] == ["Save(User u)", "Delete(Guid id)"]
+
+
+def test_parse_csharp_assembly_config():
+ result = parse_plan_xml(_CSHARP_XML)
+ ac = result["assembly_config"]
+ assert ac["new_files"] == ["src/Services/UserService.cs"]
+ assert ac["modified_files"] == ["src/DI/Container.cs"]
+
+
+def test_parse_depends_on_multiple():
+ xml = """S
+ TD
+ 0,1
+ """
+ result = parse_plan_xml(xml)
+ assert result["tasks"][0]["depends_on"] == [0, 1]
+
+
+def test_parse_empty_depends_on():
+ xml = """S
+ TD
+
+ """
+ result = parse_plan_xml(xml)
+ assert result["tasks"][0]["depends_on"] == []
+
+
+def test_parse_missing_optional_fields():
+ xml = """S
+ TD
+ """
+ result = parse_plan_xml(xml)
+ t = result["tasks"][0]
+ assert t["tools_needed"] == []
+ assert t["affected_files"] == []
+ assert t["depends_on"] == []
+ assert t["task_type"] == "code"
+ assert t["complexity"] == "medium"
+
+
+def test_parse_xml_entities():
+ """XML entities like < and & are decoded properly."""
+ xml = """S
+ T
+ Use <T> and & operator
+ """
+ result = parse_plan_xml(xml)
+ assert result["tasks"][0]["description"] == "Use and & operator"
+
+
+def test_roundtrip_dict_shape():
+ """Parsed XML dict has the same keys as what JSON planner produces."""
+ result = parse_plan_xml(_L2_XML)
+ # Must have these top-level keys
+ assert "summary" in result
+ assert "phases" in result
+ assert "open_questions" in result
+ # Each phase must have these keys
+ phase = result["phases"][0]
+ assert "name" in phase
+ assert "description" in phase
+ assert "tasks" in phase
+ # Each task must have these keys
+ task = phase["tasks"][0]
+ for key in ("title", "description", "task_type", "complexity",
+ "depends_on", "tools_needed", "requirement_ids",
+ "verification_criteria", "affected_files"):
+ assert key in task, f"Missing key: {key}"