From 2c6132ba563ae81a471dfed1799a5aa31d2e2192 Mon Sep 17 00:00:00 2001
From: JRussas <159085336+JMRussas@users.noreply.github.com>
Date: Fri, 6 Mar 2026 22:12:23 -0500
Subject: [PATCH 1/2] Switch plan generation from JSON to XML format

Claude now produces XML plans which are stored as source of truth in
the new plan_xml column. The decomposer and routes prefer plan_xml
with automatic fallback to plan_json for backward compatibility.

- Add xml_utils.py with extract_xml_plan() and parse_plan_xml()
- Rewrite planner prompt suffixes (L1/L2/L3/C#) to request XML output
- Add plan_xml TEXT column (migration 015, inline schema, models_metadata)
- Update decomposer to parse plan_xml when available
- Update routes to prefer plan_xml in read/clone/export paths
- Add plan_xml field to PlanOut schema
- 18 new XML parsing tests, updated planner test mocks to return XML

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/db/connection.py                      |   1 +
 backend/db/models_metadata.py                 |   1 +
 .../migrations/versions/015_add_plan_xml.py   |  32 ++
 backend/models/schemas.py                     |   3 +-
 backend/routes/projects.py                    |  23 +-
 backend/services/decomposer.py                |  12 +-
 backend/services/planner.py                   | 315 +++++++++--------
 backend/utils/xml_utils.py                    | 161 +++++++++
 tests/unit/test_csharp_planner.py             |  21 +-
 tests/unit/test_planner_service.py            |  22 +-
 tests/unit/test_planning_rigor.py             |  49 ++-
 tests/unit/test_xml_utils.py                  | 324 ++++++++++++++++++
 12 files changed, 781 insertions(+), 183 deletions(-)
 create mode 100644 backend/migrations/versions/015_add_plan_xml.py
 create mode 100644 backend/utils/xml_utils.py
 create mode 100644 tests/unit/test_xml_utils.py

diff --git a/backend/db/connection.py b/backend/db/connection.py
index a42d39b..b429753 100644
--- a/backend/db/connection.py
+++ b/backend/db/connection.py
@@ -59,6 +59,7 @@
     completion_tokens INTEGER NOT NULL DEFAULT 0,
     cost_usd REAL NOT NULL DEFAULT 0.0,
     plan_json TEXT NOT NULL,
+    plan_xml TEXT,
     status TEXT NOT NULL DEFAULT 'draft',
     created_at REAL NOT NULL
 );
diff --git a/backend/db/models_metadata.py b/backend/db/models_metadata.py
index 594e487..219973d 100644
--- a/backend/db/models_metadata.py
+++ b/backend/db/models_metadata.py
@@ -63,6 +63,7 @@
     Column("completion_tokens", Integer, nullable=False, server_default="0"),
     Column("cost_usd", Float, nullable=False, server_default="0.0"),
     Column("plan_json", Text, nullable=False),
+    Column("plan_xml", Text, nullable=True),
     Column("status", Text, nullable=False, server_default="draft"),
     Column("created_at", Float, nullable=False),
 )
diff --git a/backend/migrations/versions/015_add_plan_xml.py b/backend/migrations/versions/015_add_plan_xml.py
new file mode 100644
index 0000000..da03d08
--- /dev/null
+++ b/backend/migrations/versions/015_add_plan_xml.py
@@ -0,0 +1,32 @@
+#  Orchestration Engine - Migration 015
+#
+#  Add plan_xml column to plans table for XML plan storage.
+#  Existing plans remain in plan_json; new plans write both columns.
+#
+#  Depends on: 014_add_api_keys_and_claim_tracking
+#  Used by:    services/planner.py, services/decomposer.py
+
+"""Add plan_xml column to plans table.
+
+Revision ID: 015
+Revises: 014
+Create Date: 2026-03-06
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+revision = "015"
+down_revision = "014"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    with op.batch_alter_table("plans") as batch_op:
+        batch_op.add_column(sa.Column("plan_xml", sa.Text(), nullable=True))
+
+
+def downgrade():
+    with op.batch_alter_table("plans") as batch_op:
+        batch_op.drop_column("plan_xml")
diff --git a/backend/models/schemas.py b/backend/models/schemas.py
index 3a7b6c8..6e89db7 100644
--- a/backend/models/schemas.py
+++ b/backend/models/schemas.py
@@ -95,7 +95,8 @@ class PlanOut(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     cost_usd: float
-    plan: dict  # The structured plan JSON
+    plan: dict  # The structured plan data (parsed from XML or JSON)
+    plan_xml: str | None = None  # Raw XML plan (if available)
     status: PlanStatus
     created_at: float
 
diff --git a/backend/routes/projects.py b/backend/routes/projects.py
index 0eef113..575a8d3 100644
--- a/backend/routes/projects.py
+++ b/backend/routes/projects.py
@@ -35,6 +35,15 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+def _parse_plan_from_row(row) -> dict:
+    """Parse plan data from a DB row, preferring plan_xml over plan_json."""
+    plan_xml_raw = row["plan_xml"]
+    if plan_xml_raw:
+        from backend.utils.xml_utils import parse_plan_xml
+        return parse_plan_xml(plan_xml_raw)
+    return json.loads(row["plan_json"])
+
+
 async def _row_to_project(
     row, db: Database,
     include_task_summary: bool = False,
@@ -298,7 +307,8 @@ async def list_plans(
             prompt_tokens=r["prompt_tokens"],
             completion_tokens=r["completion_tokens"],
             cost_usd=r["cost_usd"],
-            plan=json.loads(r["plan_json"]),
+            plan=_parse_plan_from_row(r),
+            plan_xml=r["plan_xml"],
             status=r["status"],
             created_at=r["created_at"],
         )
@@ -438,9 +448,10 @@ async def clone_project(
             new_plan_id = uuid.uuid4().hex[:12]
             await db.execute_write(
                 "INSERT INTO plans (id, project_id, version, model_used, prompt_tokens, "
-                "completion_tokens, cost_usd, plan_json, status, created_at) "
-                "VALUES (?, ?, 1, ?, 0, 0, 0.0, ?, 'draft', ?)",
-                (new_plan_id, new_project_id, plan_row["model_used"], plan_row["plan_json"], now),
+                "completion_tokens, cost_usd, plan_json, plan_xml, status, created_at) "
+                "VALUES (?, ?, 1, ?, 0, 0, 0.0, ?, ?, 'draft', ?)",
+                (new_plan_id, new_project_id, plan_row["model_used"],
+                 plan_row["plan_json"], plan_row["plan_xml"], now),
             )
 
         # 3. Clone tasks (reset status, clear output/cost/retry)
@@ -512,8 +523,8 @@ async def export_project(
         {
             "id": p["id"], "version": p["version"], "model_used": p["model_used"],
             "prompt_tokens": p["prompt_tokens"], "completion_tokens": p["completion_tokens"],
-            "cost_usd": p["cost_usd"], "plan": json.loads(p["plan_json"]),
-            "status": p["status"], "created_at": p["created_at"],
+            "cost_usd": p["cost_usd"], "plan": _parse_plan_from_row(p),
+            "plan_xml": p["plan_xml"], "status": p["status"], "created_at": p["created_at"],
         }
         for p in plan_rows
     ]
diff --git a/backend/services/decomposer.py b/backend/services/decomposer.py
index 830b47d..29e0642 100644
--- a/backend/services/decomposer.py
+++ b/backend/services/decomposer.py
@@ -1,8 +1,8 @@
 #  Orchestration Engine - Plan Decomposer
 #
-#  Converts an approved plan JSON into task rows with dependency edges.
+#  Converts an approved plan (XML or JSON) into task rows with dependency edges.
 #
-#  Depends on: backend/config.py, services/model_router.py
+#  Depends on: backend/config.py, services/model_router.py, utils/xml_utils.py
 #  Used by:    routes/projects.py, container.py
 
 import json
@@ -66,7 +66,13 @@ async def decompose(self, project_id: str, plan_id: str) -> dict:
         if plan_row["project_id"] != project_id:
             raise NotFoundError(f"Plan {plan_id} does not belong to project {project_id}")
 
-        plan_data = json.loads(plan_row["plan_json"])
+        # Prefer XML plan (source of truth) with JSON fallback
+        plan_xml_raw = plan_row["plan_xml"]
+        if plan_xml_raw:
+            from backend.utils.xml_utils import parse_plan_xml
+            plan_data = parse_plan_xml(plan_xml_raw)
+        else:
+            plan_data = json.loads(plan_row["plan_json"])
         tasks_data, phase_names = _flatten_plan_tasks(plan_data)
 
         if not tasks_data:
diff --git a/backend/services/planner.py b/backend/services/planner.py
index b436d0f..b962db1 100644
--- a/backend/services/planner.py
+++ b/backend/services/planner.py
@@ -17,6 +17,7 @@
 from backend.models.enums import PlanningRigor, PlanStatus, ProjectStatus
 from backend.services.model_router import calculate_cost
 from backend.utils.json_utils import extract_json_object, parse_requirements
+from backend.utils.xml_utils import extract_xml_plan, parse_plan_xml
 
 logger = logging.getLogger("orchestration.planner")
 
@@ -71,110 +72,128 @@
 
 """
 
-_TASK_SCHEMA = """{
-      "title": "Short task title",
-      "description": "Detailed description...",
-      "task_type": "code|research|analysis|asset|integration|documentation",
-      "complexity": "simple|medium|complex",
-      "depends_on": [],
-      "tools_needed": ["search_knowledge", "lookup_type", "local_llm", "generate_image", "read_file", "write_file"],
-      "requirement_ids": ["R1", "R3"],
-      "verification_criteria": "How to verify this task was completed correctly",
-      "affected_files": ["src/auth.ts", "db/schema.sql"]
-    }"""
-
-_RIGOR_SUFFIX_L1 = f"""Produce a JSON plan with this exact structure:
-{{
-  "summary": "Brief summary of what will be built",
-  "tasks": [
-    {_TASK_SCHEMA}
-  ]
-}}
-
+_TASK_SCHEMA_XML = """    <task index="0">
+      <title>Short task title</title>
+      <description>Detailed description of what this task does</description>
+      <task_type>code</task_type>
+      <complexity>medium</complexity>
+      <depends_on></depends_on>
+      <tools_needed>search_knowledge,read_file,write_file</tools_needed>
+      <requirement_ids>R1,R3</requirement_ids>
+      <verification_criteria>How to verify this task was completed correctly</verification_criteria>
+      <affected_files>src/auth.ts,db/schema.sql</affected_files>
+    </task>"""
+
+_RIGOR_SUFFIX_L1 = f"""Produce an XML plan with this exact structure:
+
+<plan level="L1">
+  <summary>Brief summary of what will be built</summary>
+  <tasks>
+{_TASK_SCHEMA_XML}
+  </tasks>
+</plan>
+
+Task field notes:
+- task_type: code|research|analysis|asset|integration|documentation
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices (empty if no dependencies)
+- tools_needed: comma-separated from: search_knowledge, lookup_type, local_llm, generate_image, read_file, write_file
+- requirement_ids: comma-separated (e.g. R1,R3)
+- affected_files: comma-separated file paths
 - Aim for 3-15 tasks. Too few means tasks are too large; too many means overhead.
-
-Respond with ONLY the JSON plan, no markdown fences or explanation."""
-
-_RIGOR_SUFFIX_L2 = f"""Produce a JSON plan organized into phases. Each phase groups related tasks into a logical stage of work.
-
-{{
-  "summary": "Brief summary of what will be built",
-  "phases": [
-    {{
-      "name": "Phase name (e.g. 'Foundation', 'Core Logic', 'Integration')",
-      "description": "What this phase accomplishes and why it comes at this point",
-      "tasks": [
-        {_TASK_SCHEMA}
-      ]
-    }}
-  ],
-  "open_questions": [
-    {{
-      "question": "An ambiguity or decision in the requirements",
-      "proposed_answer": "How you propose to handle it",
-      "impact": "What changes if the answer differs"
-    }}
-  ]
-}}
+- Use XML entities for special characters in descriptions: &lt; &gt; &amp;
+
+Respond with ONLY the XML plan, no markdown fences or explanation."""
+
+_RIGOR_SUFFIX_L2 = f"""Produce an XML plan organized into phases. Each phase groups related tasks into a logical stage of work.
+
+<plan level="L2">
+  <summary>Brief summary of what will be built</summary>
+  <phases>
+    <phase name="Foundation">
+      <description>What this phase accomplishes and why it comes at this point</description>
+{_TASK_SCHEMA_XML}
+    </phase>
+  </phases>
+  <questions>
+    <question>
+      <ask>An ambiguity or decision in the requirements</ask>
+      <proposed>How you propose to handle it</proposed>
+      <impact>What changes if the answer differs</impact>
+    </question>
+  </questions>
+</plan>
+
+Task field notes:
+- task_type: code|research|analysis|asset|integration|documentation
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices, GLOBAL across all phases (empty if none)
+- tools_needed: comma-separated from: search_knowledge, lookup_type, local_llm, generate_image, read_file, write_file
+- requirement_ids: comma-separated (e.g. R1,R3)
+- affected_files: comma-separated file paths
+- Use XML entities for special characters in descriptions: &lt; &gt; &amp;
 
 Phase guidelines:
 - Group related tasks into 2-5 phases that represent logical stages of work.
 - Name phases clearly: "Research & Discovery", "Core Implementation", "Integration & Testing", etc.
 - Earlier phases should have no dependencies on later phases.
-- depends_on indices are GLOBAL across all phases (0-based from the first task in the first phase).
 - Aim for 3-15 total tasks across all phases.
 
 Open questions:
 - Surface 1-5 ambiguities, assumptions, or decisions that could affect the plan.
-- Each must include a proposed_answer so the user can approve or override quickly.
-
-Respond with ONLY the JSON plan, no markdown fences or explanation."""
-
-_RIGOR_SUFFIX_L3 = f"""Produce a thorough JSON plan organized into phases with risk analysis and test strategy.
-
-{{
-  "summary": "Brief summary of what will be built",
-  "phases": [
-    {{
-      "name": "Phase name (e.g. 'Foundation', 'Core Logic', 'Integration')",
-      "description": "What this phase accomplishes and why it comes at this point",
-      "tasks": [
-        {_TASK_SCHEMA}
-      ]
-    }}
-  ],
-  "open_questions": [
-    {{
-      "question": "An ambiguity or decision in the requirements",
-      "proposed_answer": "How you propose to handle it",
-      "impact": "What changes if the answer differs"
-    }}
-  ],
-  "risk_assessment": [
-    {{
-      "risk": "Description of a technical or schedule risk",
-      "likelihood": "low|medium|high",
-      "impact": "low|medium|high",
-      "mitigation": "How to reduce or handle this risk"
-    }}
-  ],
-  "test_strategy": {{
-    "approach": "Overall testing approach description",
-    "test_tasks": ["Task titles that represent test/verification work"],
-    "coverage_notes": "What areas need testing and how"
-  }}
-}}
+- Each must include a proposed answer so the user can approve or override quickly.
+
+Respond with ONLY the XML plan, no markdown fences or explanation."""
+
+_RIGOR_SUFFIX_L3 = f"""Produce a thorough XML plan organized into phases with risk analysis and test strategy.
+
+<plan level="L3">
+  <summary>Brief summary of what will be built</summary>
+  <phases>
+    <phase name="Foundation">
+      <description>What this phase accomplishes</description>
+{_TASK_SCHEMA_XML}
+    </phase>
+  </phases>
+  <questions>
+    <question>
+      <ask>An ambiguity or decision in the requirements</ask>
+      <proposed>How you propose to handle it</proposed>
+      <impact>What changes if the answer differs</impact>
+    </question>
+  </questions>
+  <risks>
+    <risk>
+      <description>Description of a technical or schedule risk</description>
+      <likelihood>medium</likelihood>
+      <impact>high</impact>
+      <mitigation>How to reduce or handle this risk</mitigation>
+    </risk>
+  </risks>
+  <test_strategy>
+    <approach>Overall testing approach description</approach>
+    <test_tasks>Task title 1,Task title 2</test_tasks>
+    <coverage_notes>What areas need testing and how</coverage_notes>
+  </test_strategy>
+</plan>
+
+Task field notes:
+- task_type: code|research|analysis|asset|integration|documentation
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices, GLOBAL across all phases (empty if none)
+- tools_needed: comma-separated from: search_knowledge, lookup_type, local_llm, generate_image, read_file, write_file
+- requirement_ids: comma-separated (e.g. R1,R3)
+- affected_files: comma-separated file paths
+- Use XML entities for special characters in descriptions: &lt; &gt; &amp;
 
 Phase guidelines:
 - Group related tasks into 2-5 phases that represent logical stages of work.
 - Name phases clearly: "Research & Discovery", "Core Implementation", "Integration & Testing", etc.
 - Earlier phases should have no dependencies on later phases.
-- depends_on indices are GLOBAL across all phases (0-based from the first task in the first phase).
 - Aim for 5-15 total tasks across all phases.
 
 Open questions:
 - Surface 1-5 ambiguities, assumptions, or decisions that could affect the plan.
-- Each must include a proposed_answer so the user can approve or override quickly.
 
 Risk assessment:
 - Identify 2-5 technical, integration, or scope risks.
@@ -185,7 +204,7 @@
 - Reference specific tasks that perform testing/verification.
 - Note coverage gaps the user should be aware of.
 
-You may optionally begin your response with a <thinking> block to reason through dependencies, risks, and trade-offs before producing the plan. After your reasoning (if any), output the JSON plan with no markdown fences."""
+You may optionally begin your response with a <thinking> block to reason through the plan before producing it. After your reasoning (if any), output the XML plan."""
 
 _RIGOR_SUFFIXES = {
     PlanningRigor.L1: _RIGOR_SUFFIX_L1,
@@ -228,57 +247,63 @@ def _build_system_prompt(rigor: PlanningRigor) -> str:
 
 """
 
-_CSHARP_TASK_SCHEMA = """{
-      "title": "ClassName.MethodName",
-      "description": "What this method does, including behavioral contract and edge cases",
-      "task_type": "csharp_method",
-      "complexity": "simple|medium|complex",
-      "depends_on": [],
-      "target_class": "Namespace.ClassName",
-      "target_signature": "public async Task<bool> MethodName(ParamType param)",
-      "available_methods": ["signatures of other methods in the same class or injected services"],
-      "constructor_params": ["IDbContext db", "ILogger logger"],
-      "requirement_ids": ["R1"],
-      "verification_criteria": "How to verify this method works correctly",
-      "affected_files": ["src/Services/MyService.cs"]
-    }"""
-
-_CSHARP_RIGOR_SUFFIX = f"""Produce a JSON plan organized into phases. Each phase corresponds to one class being modified or created.
-
-{{
-  "summary": "Brief summary of the feature being implemented",
-  "phases": [
-    {{
-      "name": "ClassName (e.g. 'UserService', 'OrderValidator')",
-      "description": "What this class does and why these methods are needed",
-      "tasks": [
-        {_CSHARP_TASK_SCHEMA}
-      ]
-    }}
-  ],
-  "open_questions": [
-    {{
-      "question": "An ambiguity or decision in the requirements",
-      "proposed_answer": "How you propose to handle it",
-      "impact": "What changes if the answer differs"
-    }}
-  ],
-  "assembly_config": {{
-    "new_files": ["Paths to new .cs files that need to be created"],
-    "modified_files": ["Paths to existing .cs files that will be modified"]
-  }}
-}}
+_CSHARP_TASK_SCHEMA_XML = """    <task index="0">
+      <title>ClassName.MethodName</title>
+      <description>What this method does, including behavioral contract and edge cases</description>
+      <task_type>csharp_method</task_type>
+      <complexity>medium</complexity>
+      <depends_on></depends_on>
+      <target_class>Namespace.ClassName</target_class>
+      <target_signature>public async Task&lt;bool&gt; MethodName(ParamType param)</target_signature>
+      <available_methods>signatures of other methods in the same class</available_methods>
+      <constructor_params>IDbContext db,ILogger logger</constructor_params>
+      <requirement_ids>R1</requirement_ids>
+      <verification_criteria>How to verify this method works correctly</verification_criteria>
+      <affected_files>src/Services/MyService.cs</affected_files>
+    </task>"""
+
+_CSHARP_RIGOR_SUFFIX = f"""Produce an XML plan organized into phases. Each phase corresponds to one class being modified or created.
+
+<plan level="csharp">
+  <summary>Brief summary of the feature being implemented</summary>
+  <phases>
+    <phase name="ClassName">
+      <description>What this class does and why these methods are needed</description>
+{_CSHARP_TASK_SCHEMA_XML}
+    </phase>
+  </phases>
+  <questions>
+    <question>
+      <ask>An ambiguity or decision in the requirements</ask>
+      <proposed>How you propose to handle it</proposed>
+      <impact>What changes if the answer differs</impact>
+    </question>
+  </questions>
+  <assembly_config>
+    <new_files>path/to/NewFile.cs</new_files>
+    <modified_files>path/to/ExistingFile.cs</modified_files>
+  </assembly_config>
+</plan>
+
+Task field notes:
+- task_type is always csharp_method for method-level tasks
+- complexity: simple|medium|complex
+- depends_on: comma-separated 0-based task indices, GLOBAL across all phases
+- target_class: full namespace-qualified class name
+- target_signature: exact method signature (use &lt; &gt; for generics)
+- available_methods: comma-separated signatures of other methods in the class
+- constructor_params: comma-separated injected dependencies
+- Use XML entities for special characters: &lt; &gt; &amp;
 
 Phase guidelines:
 - One phase per class. Phase name = class name.
 - Within a phase, order tasks so independent methods come first.
-- depends_on indices are GLOBAL across all phases (0-based from the first task in the first phase).
 - After all method tasks in a phase, the system will auto-create an assembly task to stitch and build.
 
 Open questions:
 - Surface 1-5 ambiguities about the requirements or existing code structure.
 
-Respond with ONLY the JSON plan, no markdown fences or explanation."""
+Respond with ONLY the XML plan, no markdown fences or explanation."""
 
 
 def _build_csharp_system_prompt(type_map: str) -> str:
@@ -421,16 +446,23 @@ async def generate(
             completion_tokens = response.usage.output_tokens
             cost = calculate_cost(PLANNING_MODEL, prompt_tokens, completion_tokens)
 
-            # Parse the plan JSON
-            try:
-                plan_data = json.loads(response_text)
-            except json.JSONDecodeError:
-                # Try to extract JSON from the response (in case of markdown fences).
-                # Use a balanced-brace approach to find the outermost JSON object,
-                # instead of a greedy regex that could match too much.
-                plan_data = extract_json_object(response_text)
-                if plan_data is None:
-                    raise PlanParseError("Failed to parse plan JSON from Claude response")
+            # Parse the plan (XML primary, JSON fallback)
+            plan_xml_str = extract_xml_plan(response_text)
+            if plan_xml_str:
+                try:
+                    plan_data = parse_plan_xml(plan_xml_str)
+                except Exception as xml_err:
+                    logger.warning("XML plan parse failed, trying JSON fallback: %s", xml_err)
+                    plan_xml_str = None  # Clear so we don't store bad XML
+
+            if not plan_xml_str:
+                # Fallback: try JSON (backward compat or if Claude ignored XML instruction)
+                try:
+                    plan_data = json.loads(response_text)
+                except json.JSONDecodeError:
+                    plan_data = extract_json_object(response_text)
+                    if plan_data is None:
+                        raise PlanParseError("Failed to parse plan from Claude response")
 
         except Exception:
             # Record actual API spend even if parsing failed — prevents budget leak
@@ -476,10 +508,11 @@ async def generate(
         now = time.time()
         await db.execute_write(
             "INSERT INTO plans (id, project_id, version, model_used, prompt_tokens, "
-            "completion_tokens, cost_usd, plan_json, status, created_at) "
-            "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            "completion_tokens, cost_usd, plan_json, plan_xml, status, created_at) "
+            "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
             (plan_id, project_id, version, PLANNING_MODEL, prompt_tokens,
-             completion_tokens, cost, json.dumps(plan_data), PlanStatus.DRAFT, now),
+             completion_tokens, cost, json.dumps(plan_data), plan_xml_str,
+             PlanStatus.DRAFT, now),
         )
 
         # Record spending and release reservation
diff --git a/backend/utils/xml_utils.py b/backend/utils/xml_utils.py
new file mode 100644
index 0000000..3061516
--- /dev/null
+++ b/backend/utils/xml_utils.py
@@ -0,0 +1,161 @@
+#  Orchestration Engine - XML Plan Utilities
+#
+#  Extraction and parsing of XML plans from LLM output.
+#  Converts XML plan format to dicts matching the existing PlanData shape
+#  so downstream code (decomposer, routes, frontend) stays unchanged.
+#
+#  Depends on: (none — stdlib only)
+#  Used by:    services/planner.py, services/decomposer.py
+
+import re
+import xml.etree.ElementTree as ET
+
+
+def extract_xml_plan(text: str) -> str | None:
+    """Extract the <plan>...</plan> block from LLM response text.
+
+    Handles markdown fences, preamble text, and <thinking> blocks.
+    Returns the raw XML string (including the <plan> tags) or None.
+    """
+    # Strip markdown fences
+    text = re.sub(r"```(?:xml)?\s*\n?", "", text)
+
+    # Find <plan and </plan>
+    start = text.find("<plan")
+    if start == -1:
+        return None
+
+    end = text.find("</plan>")
+    if end == -1:
+        return None
+
+    return text[start:end + len("</plan>")]
+
+
+def _text(el: ET.Element | None) -> str:
+    """Get text content of an element, defaulting to empty string."""
+    if el is None:
+        return ""
+    return (el.text or "").strip()
+
+
+def _split_csv(value: str) -> list[str]:
+    """Split a comma-separated string, filtering empty values."""
+    return [v.strip() for v in value.split(",") if v.strip()]
+
+
+def _parse_task(task_el: ET.Element) -> dict:
+    """Convert a <task> element to a dict matching the JSON task schema."""
+    task = {
+        "title": _text(task_el.find("title")),
+        "description": _text(task_el.find("description")),
+        "task_type": _text(task_el.find("task_type")) or "code",
+        "complexity": _text(task_el.find("complexity")) or "medium",
+        "tools_needed": _split_csv(_text(task_el.find("tools_needed"))),
+        "requirement_ids": _split_csv(_text(task_el.find("requirement_ids"))),
+        "verification_criteria": _text(task_el.find("verification_criteria")),
+        "affected_files": _split_csv(_text(task_el.find("affected_files"))),
+    }
+
+    # depends_on: comma-separated integers
+    deps_text = _text(task_el.find("depends_on"))
+    if deps_text:
+        task["depends_on"] = [int(d.strip()) for d in deps_text.split(",") if d.strip()]
+    else:
+        task["depends_on"] = []
+
+    # C# specific fields (optional)
+    for field in ("target_class", "target_signature"):
+        val = _text(task_el.find(field))
+        if val:
+            task[field] = val
+
+    # C# list fields
+    for field in ("available_methods", "constructor_params"):
+        val = _text(task_el.find(field))
+        if val:
+            task[field] = _split_csv(val)
+
+    return task
+
+
+def _parse_question(q_el: ET.Element) -> dict:
+    """Convert a <question> element to a dict."""
+    return {
+        "question": _text(q_el.find("ask")),
+        "proposed_answer": _text(q_el.find("proposed")),
+        "impact": _text(q_el.find("impact")),
+    }
+
+
+def _parse_risk(r_el: ET.Element) -> dict:
+    """Convert a <risk> element to a dict."""
+    return {
+        "risk": _text(r_el.find("description")),
+        "likelihood": _text(r_el.find("likelihood")) or "medium",
+        "impact": _text(r_el.find("impact")) or "medium",
+        "mitigation": _text(r_el.find("mitigation")),
+    }
+
+
+def parse_plan_xml(xml_str: str) -> dict:
+    """Parse an XML plan string into a dict matching the PlanData shape.
+
+    Supports L1 (flat tasks), L2 (phased + questions), L3 (+ risks + test strategy),
+    and C# reflection plans (+ target_class, target_signature, assembly_config).
+
+    Returns a dict identical in structure to what the JSON planner produces,
+    so downstream code (decomposer, routes, frontend) needs no changes.
+    """
+    root = ET.fromstring(xml_str)
+
+    result: dict = {
+        "summary": _text(root.find("summary")),
+    }
+
+    # L1: flat <tasks> container
+    tasks_el = root.find("tasks")
+    if tasks_el is not None:
+        result["tasks"] = [_parse_task(t) for t in tasks_el.findall("task")]
+
+    # L2+: <phases> container
+    phases_el = root.find("phases")
+    if phases_el is not None:
+        phases = []
+        for phase_el in phases_el.findall("phase"):
+            phase = {
+                "name": phase_el.get("name", ""),
+                "description": _text(phase_el.find("description")),
+                "tasks": [_parse_task(t) for t in phase_el.findall("task")],
+            }
+            phases.append(phase)
+        result["phases"] = phases
+
+    # L2+: <questions>
+    questions_el = root.find("questions")
+    if questions_el is not None:
+        result["open_questions"] = [_parse_question(q) for q in questions_el.findall("question")]
+
+    # L3: <risks>
+    risks_el = root.find("risks")
+    if risks_el is not None:
+        result["risk_assessment"] = [_parse_risk(r) for r in risks_el.findall("risk")]
+
+    # L3: <test_strategy>
+    ts_el = root.find("test_strategy")
+    if ts_el is not None:
+        result["test_strategy"] = {
+            "approach": _text(ts_el.find("approach")),
+            "test_tasks": _split_csv(_text(ts_el.find("test_tasks"))),
+            "coverage_notes": _text(ts_el.find("coverage_notes")),
+        }
+
+    # C#: <assembly_config>
+    ac_el = root.find("assembly_config")
+    if ac_el is not None:
+        result["assembly_config"] = {
+            "new_files": _split_csv(_text(ac_el.find("new_files"))),
+            "modified_files": _split_csv(_text(ac_el.find("modified_files"))),
+        }
+
+    return result
diff --git a/tests/unit/test_csharp_planner.py b/tests/unit/test_csharp_planner.py
index 5fbdfbf..446af35 100644
--- a/tests/unit/test_csharp_planner.py
+++ b/tests/unit/test_csharp_planner.py
@@ -30,10 +30,10 @@ def test_includes_csharp_preamble(self):
 
     def test_includes_task_schema(self):
         prompt = _build_csharp_system_prompt("types")
-        assert "target_signature" in prompt
-        assert "target_class" in prompt
-        assert "available_methods" in prompt
-        assert "constructor_params" in prompt
+        assert "<target_signature>" in prompt
+        assert "<target_class>" in prompt
+        assert "<available_methods>" in prompt
+        assert "<constructor_params>" in prompt
 
     def test_includes_strategy_rules(self):
         prompt = _build_csharp_system_prompt("types")
@@ -50,7 +50,7 @@ def test_generic_prompt_unchanged(self):
         """Verify the generic prompt path still works."""
         prompt = _build_system_prompt(PlanningRigor.L2)
         assert "project planner" in prompt
-        assert "reflected_types" not in prompt
+        assert "<reflected_types>" not in prompt
 
 
 def _make_planner_db_mock(config_json):
@@ -69,8 +69,13 @@ def _make_planner_db_mock(config_json):
     return mock_db
 
 
-def _make_anthropic_mock(response_text='{"summary": "test", "phases": []}'):
+_DEFAULT_CSHARP_XML = '<plan level="csharp"><summary>test</summary><phases></phases></plan>'
+
+
+def _make_anthropic_mock(response_text=None):
     """Create a mock anthropic module + client."""
+    if response_text is None:
+        response_text = _DEFAULT_CSHARP_XML
     mock_anthropic = AsyncMock()
     mock_client = AsyncMock()
     mock_response = AsyncMock()
@@ -116,7 +121,9 @@ async def test_csharp_strategy_fallback_on_reflection_failure(self):
             mock_reflect.return_value = None  # Reflection failed
 
             with patch("backend.services.planner.anthropic") as mock_anthropic_mod:
-                mock_anthropic, mock_client = _make_anthropic_mock('{"summary": "test", "tasks": []}')
+                mock_anthropic, mock_client = _make_anthropic_mock(
+                    '<plan level="L2"><summary>test</summary><phases></phases></plan>'
+                )
                 mock_anthropic_mod.AsyncAnthropic.return_value = mock_client
 
                 await planner.generate("proj1")
diff --git a/tests/unit/test_planner_service.py b/tests/unit/test_planner_service.py
index 0673337..f406d88 100644
--- a/tests/unit/test_planner_service.py
+++ b/tests/unit/test_planner_service.py
@@ -5,7 +5,6 @@
 #  Depends on: backend/services/planner.py, backend/db/connection.py
 #  Used by:    pytest
 
-import json
 import time
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -56,14 +55,25 @@ def test_json_after_markdown_fence(self):
 # TestPlannerServiceGenerate
 # ---------------------------------------------------------------------------
 
+_DEFAULT_XML_PLAN = """<plan level="L1">
+  <summary>Test plan</summary>
+  <tasks>
+    <task index="0">
+      <title>Task 1</title>
+      <description>Do it</description>
+      <task_type>code</task_type>
+      <complexity>simple</complexity>
+      <depends_on></depends_on>
+      <tools_needed></tools_needed>
+    </task>
+  </tasks>
+</plan>"""
+
+
 def _make_plan_response(plan_text=None, pt=100, ct=200):
     """Build a mock Claude response for planning."""
     if plan_text is None:
-        plan_text = json.dumps({
-            "summary": "Test plan",
-            "tasks": [{"title": "Task 1", "description": "Do it", "task_type": "code",
-                        "complexity": "simple", "depends_on": [], "tools_needed": []}],
-        })
+        plan_text = _DEFAULT_XML_PLAN
     response = MagicMock()
     response.content = [MagicMock(text=plan_text, type="text")]
     response.usage = MagicMock(input_tokens=pt, output_tokens=ct)
diff --git a/tests/unit/test_planning_rigor.py b/tests/unit/test_planning_rigor.py
index 3a783bd..314555b 100644
--- a/tests/unit/test_planning_rigor.py
+++ b/tests/unit/test_planning_rigor.py
@@ -31,20 +31,20 @@ class TestBuildSystemPrompt:
 
     def test_l1_prompt_contains_flat_tasks(self):
         prompt = _build_system_prompt(PlanningRigor.L1)
-        assert '"tasks"' in prompt
-        assert '"phases"' not in prompt
+        assert "<tasks>" in prompt
+        assert "<phases>" not in prompt
 
     def test_l2_prompt_contains_phases_and_questions(self):
         prompt = _build_system_prompt(PlanningRigor.L2)
-        assert '"phases"' in prompt
-        assert '"open_questions"' in prompt
-        assert '"risk_assessment"' not in prompt
+        assert "<phases>" in prompt
+        assert "<questions>" in prompt
+        assert "<risks>" not in prompt
 
     def test_l3_prompt_contains_risk_and_test_strategy(self):
         prompt = _build_system_prompt(PlanningRigor.L3)
-        assert '"phases"' in prompt
-        assert '"risk_assessment"' in prompt
-        assert '"test_strategy"' in prompt
+        assert "<phases>" in prompt
+        assert "<risks>" in prompt
+        assert "<test_strategy>" in prompt
 
     def test_all_rigor_levels_have_suffix(self):
         for rigor in PlanningRigor:
@@ -161,13 +161,24 @@ def test_global_dependency_indexing_preserved(self):
 # PlannerService rigor from project config
 # ---------------------------------------------------------------------------
 
+_DEFAULT_XML_PLAN = """<plan level="L1">
+  <summary>Test plan</summary>
+  <tasks>
+    <task index="0">
+      <title>T1</title>
+      <description>Do it</description>
+      <task_type>code</task_type>
+      <complexity>simple</complexity>
+      <depends_on></depends_on>
+      <tools_needed></tools_needed>
+    </task>
+  </tasks>
+</plan>"""
+
+
 def _make_plan_response(plan_text=None, pt=100, ct=200):
     if plan_text is None:
-        plan_text = json.dumps({
-            "summary": "Test plan",
-            "tasks": [{"title": "T1", "description": "Do it", "task_type": "code",
-                        "complexity": "simple", "depends_on": [], "tools_needed": []}],
-        })
+        plan_text = _DEFAULT_XML_PLAN
     response = MagicMock()
     response.content = [MagicMock(text=plan_text, type="text")]
     response.usage = MagicMock(input_tokens=pt, output_tokens=ct)
@@ -206,8 +217,8 @@ async def test_l1_uses_flat_prompt(self, _mock_cost, rigor_db):
 
         call_kwargs = mock_client.messages.create.call_args.kwargs
         system = call_kwargs["system"]
-        assert '"tasks"' in system
-        assert '"phases"' not in system
+        assert "<tasks>" in system
+        assert "<phases>" not in system
         assert call_kwargs["max_tokens"] == _MAX_TOKENS_BY_RIGOR[PlanningRigor.L1]
 
     @patch("backend.services.planner.calculate_cost", return_value=0.01)
@@ -226,8 +237,8 @@ async def test_l3_uses_thorough_prompt(self, _mock_cost, rigor_db):
 
         call_kwargs = mock_client.messages.create.call_args.kwargs
         system = call_kwargs["system"]
-        assert '"risk_assessment"' in system
-        assert '"test_strategy"' in system
+        assert "<risks>" in system
+        assert "<test_strategy>" in system
         assert call_kwargs["max_tokens"] == _MAX_TOKENS_BY_RIGOR[PlanningRigor.L3]
 
     @patch("backend.services.planner.calculate_cost", return_value=0.01)
@@ -246,8 +257,8 @@ async def test_missing_rigor_defaults_to_l2(self, _mock_cost, rigor_db):
 
         call_kwargs = mock_client.messages.create.call_args.kwargs
         system = call_kwargs["system"]
-        assert '"phases"' in system
-        assert '"open_questions"' in system
+        assert "<phases>" in system
+        assert "<questions>" in system
         assert call_kwargs["max_tokens"] == _MAX_TOKENS_BY_RIGOR[PlanningRigor.L2]
 
 
diff --git a/tests/unit/test_xml_utils.py b/tests/unit/test_xml_utils.py
new file mode 100644
index 0000000..ac9add9
--- /dev/null
+++ b/tests/unit/test_xml_utils.py
@@ -0,0 +1,324 @@
+#  Orchestration Engine - XML Plan Utilities Tests
+#
+#  Tests for extract_xml_plan() and parse_plan_xml() in xml_utils.py.
+#
+#  Depends on: backend/utils/xml_utils.py
+#  Used by:    CI
+
+from backend.utils.xml_utils import extract_xml_plan, parse_plan_xml
+
+
+# --- extract_xml_plan tests ---
+
+
+def test_extract_simple_plan():
+    text = '<plan level="L1"><summary>Test</summary></plan>'
+    assert extract_xml_plan(text) == text
+
+
+def test_extract_with_preamble():
+    text = 'Here is my plan:\n\n<plan level="L2"><summary>S</summary></plan>\n\nDone.'
+    result = extract_xml_plan(text)
+    assert result.startswith("<plan")
+    assert result.endswith("</plan>")
+
+
+def test_extract_with_markdown_fences():
+    text = '```xml\n<plan level="L1"><summary>S</summary></plan>\n```'
+    result = extract_xml_plan(text)
+    assert result is not None
+    assert "<summary>S</summary>" in result
+
+
+def test_extract_with_thinking_block():
+    text = '<thinking>Let me reason...</thinking>\n\n<plan level="L2"><summary>S</summary></plan>'
+    result = extract_xml_plan(text)
+    assert result.startswith("<plan")
+    assert "<thinking>" not in result
+
+
+def test_extract_no_plan_returns_none():
+    assert extract_xml_plan("Just some text with no plan") is None
+
+
+def test_extract_unclosed_plan_returns_none():
+    assert extract_xml_plan('<plan level="L1"><summary>S</summary>') is None
+
+
+# --- parse_plan_xml tests ---
+
+
+_L1_XML = """<plan level="L1">
+  <summary>Build a widget</summary>
+  <tasks>
+    <task index="0">
+      <title>Create widget</title>
+      <description>Build the widget component</description>
+      <task_type>code</task_type>
+      <complexity>medium</complexity>
+      <depends_on></depends_on>
+      <tools_needed>read_file,write_file</tools_needed>
+      <requirement_ids>R1</requirement_ids>
+      <verification_criteria>Widget renders</verification_criteria>
+      <affected_files>src/widget.ts</affected_files>
+    </task>
+    <task index="1">
+      <title>Test widget</title>
+      <description>Add unit tests</description>
+      <task_type>code</task_type>
+      <complexity>simple</complexity>
+      <depends_on>0</depends_on>
+      <tools_needed>write_file</tools_needed>
+      <requirement_ids>R1</requirement_ids>
+      <verification_criteria>Tests pass</verification_criteria>
+      <affected_files>tests/widget.test.ts</affected_files>
+    </task>
+  </tasks>
+</plan>"""
+
+
+def test_parse_l1_flat_tasks():
+    result = parse_plan_xml(_L1_XML)
+    assert result["summary"] == "Build a widget"
+    assert len(result["tasks"]) == 2
+    assert result["tasks"][0]["title"] == "Create widget"
+    assert result["tasks"][0]["task_type"] == "code"
+    assert result["tasks"][0]["depends_on"] == []
+    assert result["tasks"][0]["tools_needed"] == ["read_file", "write_file"]
+    assert result["tasks"][0]["requirement_ids"] == ["R1"]
+    assert result["tasks"][0]["affected_files"] == ["src/widget.ts"]
+    assert result["tasks"][1]["depends_on"] == [0]
+
+
+_L2_XML = """<plan level="L2">
+  <summary>Build auth system</summary>
+  <phases>
+    <phase name="Foundation">
+      <description>Set up core auth infrastructure</description>
+      <task index="0">
+        <title>User model</title>
+        <description>Define user table</description>
+        <task_type>code</task_type>
+        <complexity>medium</complexity>
+        <depends_on></depends_on>
+        <tools_needed>write_file</tools_needed>
+        <requirement_ids>R1</requirement_ids>
+        <verification_criteria>Migration runs</verification_criteria>
+        <affected_files>db/models.py</affected_files>
+      </task>
+    </phase>
+    <phase name="Integration">
+      <description>Wire auth into API</description>
+      <task index="1">
+        <title>Auth middleware</title>
+        <description>JWT validation</description>
+        <task_type>code</task_type>
+        <complexity>complex</complexity>
+        <depends_on>0</depends_on>
+        <tools_needed>read_file,write_file</tools_needed>
+        <requirement_ids>R2,R3</requirement_ids>
+        <verification_criteria>Auth tests pass</verification_criteria>
+        <affected_files>src/middleware.py</affected_files>
+      </task>
+    </phase>
+  </phases>
+  <questions>
+    <question>
+      <ask>Use JWT or sessions?</ask>
+      <proposed>JWT for stateless auth</proposed>
+      <impact>Sessions would need Redis</impact>
+    </question>
+  </questions>
+</plan>"""
+
+
+def test_parse_l2_phased():
+    result = parse_plan_xml(_L2_XML)
+    assert result["summary"] == "Build auth system"
+    assert len(result["phases"]) == 2
+    assert result["phases"][0]["name"] == "Foundation"
+    assert result["phases"][0]["description"] == "Set up core auth infrastructure"
+    assert len(result["phases"][0]["tasks"]) == 1
+    assert result["phases"][1]["tasks"][0]["depends_on"] == [0]
+    assert result["phases"][1]["tasks"][0]["requirement_ids"] == ["R2", "R3"]
+
+
+def test_parse_l2_open_questions():
+    result = parse_plan_xml(_L2_XML)
+    assert len(result["open_questions"]) == 1
+    q = result["open_questions"][0]
+    assert q["question"] == "Use JWT or sessions?"
+    assert q["proposed_answer"] == "JWT for stateless auth"
+    assert q["impact"] == "Sessions would need Redis"
+
+
+_L3_XML = """<plan level="L3">
+  <summary>Payment integration</summary>
+  <phases>
+    <phase name="Core">
+      <description>Payment processing</description>
+      <task index="0">
+        <title>Stripe client</title>
+        <description>Wrap Stripe API</description>
+        <task_type>code</task_type>
+        <complexity>complex</complexity>
+        <depends_on></depends_on>
+        <tools_needed>write_file</tools_needed>
+        <requirement_ids>R1</requirement_ids>
+        <verification_criteria>API calls succeed</verification_criteria>
+        <affected_files>src/stripe.py</affected_files>
+      </task>
+    </phase>
+  </phases>
+  <questions>
+    <question>
+      <ask>Which payment provider?</ask>
+      <proposed>Stripe</proposed>
+      <impact>Different SDK</impact>
+    </question>
+  </questions>
+  <risks>
+    <risk>
+      <description>Stripe rate limits during peak</description>
+      <likelihood>low</likelihood>
+      <impact>high</impact>
+      <mitigation>Implement retry with exponential backoff</mitigation>
+    </risk>
+  </risks>
+  <test_strategy>
+    <approach>Mock Stripe API in tests</approach>
+    <test_tasks>Stripe client,Payment flow</test_tasks>
+    <coverage_notes>Cover refund edge cases</coverage_notes>
+  </test_strategy>
+</plan>"""
+
+
+def test_parse_l3_risks():
+    result = parse_plan_xml(_L3_XML)
+    assert len(result["risk_assessment"]) == 1
+    r = result["risk_assessment"][0]
+    assert r["risk"] == "Stripe rate limits during peak"
+    assert r["likelihood"] == "low"
+    assert r["impact"] == "high"
+    assert "backoff" in r["mitigation"]
+
+
+def test_parse_l3_test_strategy():
+    result = parse_plan_xml(_L3_XML)
+    ts = result["test_strategy"]
+    assert ts["approach"] == "Mock Stripe API in tests"
+    assert ts["test_tasks"] == ["Stripe client", "Payment flow"]
+    assert "refund" in ts["coverage_notes"]
+
+
+_CSHARP_XML = """<plan level="csharp">
+  <summary>Implement user service</summary>
+  <phases>
+    <phase name="UserService">
+      <description>Core user operations</description>
+      <task index="0">
+        <title>UserService.GetUser</title>
+        <description>Fetch user by ID</description>
+        <task_type>csharp_method</task_type>
+        <complexity>medium</complexity>
+        <depends_on></depends_on>
+        <target_class>MyApp.Services.UserService</target_class>
+        <target_signature>public async Task&lt;User&gt; GetUser(Guid id)</target_signature>
+        <available_methods>Save(User u),Delete(Guid id)</available_methods>
+        <constructor_params>IDbContext db,ILogger logger</constructor_params>
+        <requirement_ids>R1</requirement_ids>
+        <verification_criteria>Returns user or throws</verification_criteria>
+        <affected_files>src/Services/UserService.cs</affected_files>
+      </task>
+    </phase>
+  </phases>
+  <questions>
+    <question>
+      <ask>Use nullable return or exception?</ask>
+      <proposed>Exception for not found</proposed>
+      <impact>Changes caller error handling</impact>
+    </question>
+  </questions>
+  <assembly_config>
+    <new_files>src/Services/UserService.cs</new_files>
+    <modified_files>src/DI/Container.cs</modified_files>
+  </assembly_config>
+</plan>"""
+
+
+def test_parse_csharp_plan():
+    result = parse_plan_xml(_CSHARP_XML)
+    task = result["phases"][0]["tasks"][0]
+    assert task["task_type"] == "csharp_method"
+    assert task["target_class"] == "MyApp.Services.UserService"
+    assert "Task<User>" in task["target_signature"]  # XML entity decoded
+    assert task["constructor_params"] == ["IDbContext db", "ILogger logger"]
+    assert task["available_methods"] == ["Save(User u)", "Delete(Guid id)"]
+
+
+def test_parse_csharp_assembly_config():
+    result = parse_plan_xml(_CSHARP_XML)
+    ac = result["assembly_config"]
+    assert ac["new_files"] == ["src/Services/UserService.cs"]
+    assert ac["modified_files"] == ["src/DI/Container.cs"]
+
+
+def test_parse_depends_on_multiple():
+    xml = """<plan level="L1"><summary>S</summary><tasks>
+    <task index="2"><title>T</title><description>D</description>
+    <depends_on>0,1</depends_on></task>
+    </tasks></plan>"""
+    result = parse_plan_xml(xml)
+    assert result["tasks"][0]["depends_on"] == [0, 1]
+
+
+def test_parse_empty_depends_on():
+    xml = """<plan level="L1"><summary>S</summary><tasks>
+    <task index="0"><title>T</title><description>D</description>
+    <depends_on></depends_on></task>
+    </tasks></plan>"""
+    result = parse_plan_xml(xml)
+    assert result["tasks"][0]["depends_on"] == []
+
+
+def test_parse_missing_optional_fields():
+    xml = """<plan level="L1"><summary>S</summary><tasks>
+    <task index="0"><title>T</title><description>D</description></task>
+    </tasks></plan>"""
+    result = parse_plan_xml(xml)
+    t = result["tasks"][0]
+    assert t["tools_needed"] == []
+    assert t["affected_files"] == []
+    assert t["depends_on"] == []
+    assert t["task_type"] == "code"
+    assert t["complexity"] == "medium"
+
+
+def test_parse_xml_entities():
+    """XML entities like &lt; and &amp; are decoded properly."""
+    xml = """<plan level="L1"><summary>S</summary><tasks>
+    <task index="0"><title>T</title>
+    <description>Use &lt;T&gt; and &amp; operator</description></task>
+    </tasks></plan>"""
+    result = parse_plan_xml(xml)
+    assert result["tasks"][0]["description"] == "Use <T> and & operator"
+
+
+def test_roundtrip_dict_shape():
+    """Parsed XML dict has the same keys as what JSON planner produces."""
+    result = parse_plan_xml(_L2_XML)
+    # Must have these top-level keys
+    assert "summary" in result
+    assert "phases" in result
+    assert "open_questions" in result
+    # Each phase must have these keys
+    phase = result["phases"][0]
+    assert "name" in phase
+    assert "description" in phase
+    assert "tasks" in phase
+    # Each task must have these keys
+    task = phase["tasks"][0]
+    for key in ("title", "description", "task_type", "complexity",
+                "depends_on", "tools_needed", "requirement_ids",
+                "verification_criteria", "affected_files"):
+        assert key in task, f"Missing key: {key}"

From 0a9b9dca099f2c78fa44cdd68e593e4e4c619cb8 Mon Sep 17 00:00:00 2001
From: JRussas <159085336+JMRussas@users.noreply.github.com>
Date: Fri, 6 Mar 2026 22:14:36 -0500
Subject: [PATCH 2/2] Update CLAUDE.md with XML plan format and test count

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index b5eb728..5bdae49 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -69,6 +69,7 @@ docker run -p 5200:5200 -v ./config.json:/app/config.json orchestration
 | `backend/services/git_service.py` | Stateless git operations via subprocess + asyncio.to_thread |
 | `backend/services/resource_monitor.py` | Health checks (Ollama, ComfyUI, Claude) |
 | `backend/services/progress.py` | SSE broadcast, event persistence |
+| `backend/utils/xml_utils.py` | XML plan extraction and parsing (extract_xml_plan, parse_plan_xml) |
 | `backend/tools/registry.py` | Injectable `ToolRegistry` class |
 | `backend/tools/` | Tool implementations (RAG, Ollama, ComfyUI, file) |
 | `frontend/` | React 19 + TypeScript + Vite UI (ErrorBoundary, 404 page) |
@@ -93,6 +94,7 @@ docker run -p 5200:5200 -v ./config.json:/app/config.json orchestration
 - **Auth**: JWT Bearer tokens for REST, API keys (`orch_` prefix) for MCP/external executors, short-lived SSE tokens for EventSource. First registered user becomes admin.
 - **Ownership**: projects have `owner_id`. Users see/modify only their own projects. Admins can access all.
 - **Budget**: every API call recorded in `usage_log`, checked against limits before execution. Budget endpoints are admin-only.
+- **Plans**: XML format (source of truth in `plan_xml` column). Dual-column: `plan_xml` + `plan_json` for backward compat. Decomposer/routes prefer XML with JSON fallback. Planner has JSON fallback if Claude returns JSON despite XML prompt.
 - **Models**: Ollama (free) for simple tasks, Haiku ($) for medium, Sonnet ($$) for complex
 - **Tools**: registered in `ToolRegistry` class, injected via DI container
 - **SSE**: short-lived token via `POST /api/events/{project_id}/token`, then stream via `GET /api/events/{project_id}?token=...`
@@ -108,7 +110,7 @@ docker run -p 5200:5200 -v ./config.json:/app/config.json orchestration
 - **Traceability**: requirements numbered [R1], [R2], mapped to tasks; coverage endpoint shows gaps
 - **External execution**: MCP server (`backend/mcp/server.py`) for Claude Code integration. Execution modes: auto (engine-only), hybrid (Ollama internal, Claude external), external (all external). Tasks claimed atomically via CAS, results submitted with cost tracking.
 - **Git integration**: optional per-project (`repo_path` nullable). `GitService` wraps subprocess via `asyncio.to_thread()`. Config in `git.*` section. Phase 1 (foundation) complete; execution wiring (Phase 2+) pending.
-- **Tests**: Backend: pytest-asyncio (auto mode), 731 tests. Frontend: vitest + @testing-library/react, 137 tests. Load tests: 7 (excluded from CI via `slow` marker)
+- **Tests**: Backend: pytest-asyncio (auto mode), 797 tests. Frontend: vitest + @testing-library/react, 137 tests. Load tests: 7 (excluded from CI via `slow` marker)
 
 ## Git Workflow