diff --git a/app/desktop/desktop_server.py b/app/desktop/desktop_server.py
index bbb6b3910..dd52ce35b 100644
--- a/app/desktop/desktop_server.py
+++ b/app/desktop/desktop_server.py
@@ -14,6 +14,7 @@
 from kiln_ai.utils.logging import setup_litellm_logging
 
 from app.desktop.log_config import log_config
+from app.desktop.studio_server.chat_api import connect_chat_api
 from app.desktop.studio_server.copilot_api import connect_copilot_api
 from app.desktop.studio_server.data_gen_api import connect_data_gen_api
 from app.desktop.studio_server.dev_tools import connect_dev_tools
@@ -71,6 +72,7 @@ def make_app(tk_root: tk.Tk | None = None):
     connect_import_api(app, tk_root=tk_root)
     connect_tool_servers_api(app)
     connect_prompt_optimization_job_api(app)
+    connect_chat_api(app)
     connect_copilot_api(app)
     connect_dev_tools(app)
 
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/api/chat/__init__.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/api/chat/__init__.py
new file mode 100644
index 000000000..2d7c0b23d
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/api/chat/__init__.py
@@ -0,0 +1 @@
+"""Contains endpoint functions for accessing the API"""
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/api/chat/handle_chat_v1_chat_post.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/api/chat/handle_chat_v1_chat_post.py
new file mode 100644
index 000000000..e621ed42c
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/api/chat/handle_chat_v1_chat_post.py
@@ -0,0 +1,164 @@
+from http import HTTPStatus
+from typing import Any
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.chat_request import ChatRequest
+from ...models.http_validation_error import HTTPValidationError
+from ...types import Response
+
+
+def _get_kwargs(
+    *,
+    body: ChatRequest,
+) -> dict[str, Any]:
+    headers: dict[str, Any] = {}
+
+    _kwargs: dict[str, Any] = {
+        "method": "post",
+        "url": "/v1/chat/",
+    }
+
+    _kwargs["json"] = body.to_dict()
+
+    headers["Content-Type"] = "application/json"
+
+    _kwargs["headers"] = headers
+    return _kwargs
+
+
+def _parse_response(
+    *, client: AuthenticatedClient | Client, response: httpx.Response
+) -> Any | HTTPValidationError | None:
+    if response.status_code == 200:
+        response_200 = response.json()
+        return response_200
+
+    if response.status_code == 422:
+        response_422 = HTTPValidationError.from_dict(response.json())
+
+        return response_422
+
+    if client.raise_on_unexpected_status:
+        raise errors.UnexpectedStatus(response.status_code, response.content)
+    else:
+        return None
+
+
+def _build_response(
+    *, client: AuthenticatedClient | Client, response: httpx.Response
+) -> Response[Any | HTTPValidationError]:
+    return Response(
+        status_code=HTTPStatus(response.status_code),
+        content=response.content,
+        headers=response.headers,
+        parsed=_parse_response(client=client, response=response),
+    )
+
+
+def sync_detailed(
+    *,
+    client: AuthenticatedClient,
+    body: ChatRequest,
+) -> Response[Any | HTTPValidationError]:
+    """Handle Chat
+
+    Args:
+        body (ChatRequest):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Response[Any | HTTPValidationError]
+    """
+
+    kwargs = _get_kwargs(
+        body=body,
+    )
+
+    response = client.get_httpx_client().request(
+        **kwargs,
+    )
+
+    return _build_response(client=client, response=response)
+
+
+def sync(
+    *,
+    client: AuthenticatedClient,
+    body: ChatRequest,
+) -> Any | HTTPValidationError | None:
+    """Handle Chat
+
+    Args:
+        body (ChatRequest):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Any | HTTPValidationError
+    """
+
+    return sync_detailed(
+        client=client,
+        body=body,
+    ).parsed
+
+
+async def asyncio_detailed(
+    *,
+    client: AuthenticatedClient,
+    body: ChatRequest,
+) -> Response[Any | HTTPValidationError]:
+    """Handle Chat
+
+    Args:
+        body (ChatRequest):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Response[Any | HTTPValidationError]
+    """
+
+    kwargs = _get_kwargs(
+        body=body,
+    )
+
+    response = await client.get_async_httpx_client().request(**kwargs)
+
+    return _build_response(client=client, response=response)
+
+
+async def asyncio(
+    *,
+    client: AuthenticatedClient,
+    body: ChatRequest,
+) -> Any | HTTPValidationError | None:
+    """Handle Chat
+
+    Args:
+        body (ChatRequest):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Any | HTTPValidationError
+    """
+
+    return (
+        await asyncio_detailed(
+            client=client,
+            body=body,
+        )
+    ).parsed
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/__init__.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/__init__.py
index d793ae480..5bd7ed2e2 100644
--- a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/__init__.py
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/__init__.py
@@ -7,12 +7,15 @@
     BodyStartPromptOptimizationJobV1JobsPromptOptimizationJobStartPost,
 )
 from .body_start_sample_job_v1_jobs_sample_job_start_post import BodyStartSampleJobV1JobsSampleJobStartPost
+from .chat_request import ChatRequest
 from .check_entitlements_v1_check_entitlements_get_response_check_entitlements_v1_check_entitlements_get import (
     CheckEntitlementsV1CheckEntitlementsGetResponseCheckEntitlementsV1CheckEntitlementsGet,
 )
 from .check_model_supported_response import CheckModelSupportedResponse
 from .clarify_spec_input import ClarifySpecInput
 from .clarify_spec_output import ClarifySpecOutput
+from .client_message import ClientMessage
+from .client_message_part import ClientMessagePart
 from .examples_for_feedback_item import ExamplesForFeedbackItem
 from .examples_with_feedback_item import ExamplesWithFeedbackItem
 from .generate_batch_input import GenerateBatchInput
@@ -51,6 +54,8 @@
 from .synthetic_data_generation_step_config_input import SyntheticDataGenerationStepConfigInput
 from .task_info import TaskInfo
 from .task_metadata import TaskMetadata
+from .tool_invocation import ToolInvocation
+from .tool_invocation_state import ToolInvocationState
 from .validation_error import ValidationError
 
 __all__ = (
@@ -59,10 +64,13 @@
     "ApiKeyVerificationResult",
     "BodyStartPromptOptimizationJobV1JobsPromptOptimizationJobStartPost",
     "BodyStartSampleJobV1JobsSampleJobStartPost",
+    "ChatRequest",
     "CheckEntitlementsV1CheckEntitlementsGetResponseCheckEntitlementsV1CheckEntitlementsGet",
     "CheckModelSupportedResponse",
     "ClarifySpecInput",
     "ClarifySpecOutput",
+    "ClientMessage",
+    "ClientMessagePart",
     "ExamplesForFeedbackItem",
     "ExamplesWithFeedbackItem",
     "GenerateBatchInput",
@@ -101,5 +109,7 @@
     "SyntheticDataGenerationStepConfigInput",
     "TaskInfo",
     "TaskMetadata",
+    "ToolInvocation",
+    "ToolInvocationState",
     "ValidationError",
 )
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/chat_request.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/chat_request.py
new file mode 100644
index 000000000..aaab8fe61
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/chat_request.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, TypeVar, cast
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+from ..types import UNSET, Unset
+
+if TYPE_CHECKING:
+    from ..models.client_message import ClientMessage
+
+
+T = TypeVar("T", bound="ChatRequest")
+
+
+@_attrs_define
+class ChatRequest:
+    """
+    Attributes:
+        messages (list[ClientMessage]):
+        task_id (None | str | Unset):
+    """
+
+    messages: list[ClientMessage]
+    task_id: None | str | Unset = UNSET
+    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        messages = []
+        for messages_item_data in self.messages:
+            messages_item = messages_item_data.to_dict()
+            messages.append(messages_item)
+
+        task_id: None | str | Unset
+        if isinstance(self.task_id, Unset):
+            task_id = UNSET
+        else:
+            task_id = self.task_id
+
+        field_dict: dict[str, Any] = {}
+        field_dict.update(self.additional_properties)
+        field_dict.update(
+            {
+                "messages": messages,
+            }
+        )
+        if task_id is not UNSET:
+            field_dict["task_id"] = task_id
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        from ..models.client_message import ClientMessage
+
+        d = dict(src_dict)
+        messages = []
+        _messages = d.pop("messages")
+        for messages_item_data in _messages:
+            messages_item = ClientMessage.from_dict(messages_item_data)
+
+            messages.append(messages_item)
+
+        def _parse_task_id(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        task_id = _parse_task_id(d.pop("task_id", UNSET))
+
+        chat_request = cls(
+            messages=messages,
+            task_id=task_id,
+        )
+
+        chat_request.additional_properties = d
+        return chat_request
+
+    @property
+    def additional_keys(self) -> list[str]:
+        return list(self.additional_properties.keys())
+
+    def __getitem__(self, key: str) -> Any:
+        return self.additional_properties[key]
+
+    def __setitem__(self, key: str, value: Any) -> None:
+        self.additional_properties[key] = value
+
+    def __delitem__(self, key: str) -> None:
+        del self.additional_properties[key]
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.additional_properties
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/client_message.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/client_message.py
new file mode 100644
index 000000000..ba9847cf1
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/client_message.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, TypeVar, cast
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+from ..types import UNSET, Unset
+
+if TYPE_CHECKING:
+    from ..models.client_message_part import ClientMessagePart
+    from ..models.tool_invocation import ToolInvocation
+
+
+T = TypeVar("T", bound="ClientMessage")
+
+
+@_attrs_define
+class ClientMessage:
+    """
+    Attributes:
+        role (str):
+        content (None | str | Unset):
+        parts (list[ClientMessagePart] | None | Unset):
+        tool_invocations (list[ToolInvocation] | None | Unset):
+    """
+
+    role: str
+    content: None | str | Unset = UNSET
+    parts: list[ClientMessagePart] | None | Unset = UNSET
+    tool_invocations: list[ToolInvocation] | None | Unset = UNSET
+    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        role = self.role
+
+        content: None | str | Unset
+        if isinstance(self.content, Unset):
+            content = UNSET
+        else:
+            content = self.content
+
+        parts: list[dict[str, Any]] | None | Unset
+        if isinstance(self.parts, Unset):
+            parts = UNSET
+        elif isinstance(self.parts, list):
+            parts = []
+            for parts_type_0_item_data in self.parts:
+                parts_type_0_item = parts_type_0_item_data.to_dict()
+                parts.append(parts_type_0_item)
+
+        else:
+            parts = self.parts
+
+        tool_invocations: list[dict[str, Any]] | None | Unset
+        if isinstance(self.tool_invocations, Unset):
+            tool_invocations = UNSET
+        elif isinstance(self.tool_invocations, list):
+            tool_invocations = []
+            for tool_invocations_type_0_item_data in self.tool_invocations:
+                tool_invocations_type_0_item = tool_invocations_type_0_item_data.to_dict()
+                tool_invocations.append(tool_invocations_type_0_item)
+
+        else:
+            tool_invocations = self.tool_invocations
+
+        field_dict: dict[str, Any] = {}
+        field_dict.update(self.additional_properties)
+        field_dict.update(
+            {
+                "role": role,
+            }
+        )
+        if content is not UNSET:
+            field_dict["content"] = content
+        if parts is not UNSET:
+            field_dict["parts"] = parts
+        if tool_invocations is not UNSET:
+            field_dict["toolInvocations"] = tool_invocations
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        from ..models.client_message_part import ClientMessagePart
+        from ..models.tool_invocation import ToolInvocation
+
+        d = dict(src_dict)
+        role = d.pop("role")
+
+        def _parse_content(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        content = _parse_content(d.pop("content", UNSET))
+
+        def _parse_parts(data: object) -> list[ClientMessagePart] | None | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            try:
+                if not isinstance(data, list):
+                    raise TypeError()
+                parts_type_0 = []
+                _parts_type_0 = data
+                for parts_type_0_item_data in _parts_type_0:
+                    parts_type_0_item = ClientMessagePart.from_dict(parts_type_0_item_data)
+
+                    parts_type_0.append(parts_type_0_item)
+
+                return parts_type_0
+            except (TypeError, ValueError, AttributeError, KeyError):
+                pass
+            return cast(list[ClientMessagePart] | None | Unset, data)
+
+        parts = _parse_parts(d.pop("parts", UNSET))
+
+        def _parse_tool_invocations(data: object) -> list[ToolInvocation] | None | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            try:
+                if not isinstance(data, list):
+                    raise TypeError()
+                tool_invocations_type_0 = []
+                _tool_invocations_type_0 = data
+                for tool_invocations_type_0_item_data in _tool_invocations_type_0:
+                    tool_invocations_type_0_item = ToolInvocation.from_dict(tool_invocations_type_0_item_data)
+
+                    tool_invocations_type_0.append(tool_invocations_type_0_item)
+
+                return tool_invocations_type_0
+            except (TypeError, ValueError, AttributeError, KeyError):
+                pass
+            return cast(list[ToolInvocation] | None | Unset, data)
+
+        tool_invocations = _parse_tool_invocations(d.pop("toolInvocations", UNSET))
+
+        client_message = cls(
+            role=role,
+            content=content,
+            parts=parts,
+            tool_invocations=tool_invocations,
+        )
+
+        client_message.additional_properties = d
+        return client_message
+
+    @property
+    def additional_keys(self) -> list[str]:
+        return list(self.additional_properties.keys())
+
+    def __getitem__(self, key: str) -> Any:
+        return self.additional_properties[key]
+
+    def __setitem__(self, key: str, value: Any) -> None:
+        self.additional_properties[key] = value
+
+    def __delitem__(self, key: str) -> None:
+        del self.additional_properties[key]
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.additional_properties
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/client_message_part.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/client_message_part.py
new file mode 100644
index 000000000..67880d9cd
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/client_message_part.py
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, TypeVar, cast
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="ClientMessagePart")
+
+
+@_attrs_define
+class ClientMessagePart:
+    """
+    Attributes:
+        type_ (str):
+        text (None | str | Unset):
+        content_type (None | str | Unset):
+        url (None | str | Unset):
+        data (Any | Unset):
+        tool_call_id (None | str | Unset):
+        tool_name (None | str | Unset):
+        state (None | str | Unset):
+        input_ (Any | Unset):
+        output (Any | Unset):
+        args (Any | Unset):
+    """
+
+    type_: str
+    text: None | str | Unset = UNSET
+    content_type: None | str | Unset = UNSET
+    url: None | str | Unset = UNSET
+    data: Any | Unset = UNSET
+    tool_call_id: None | str | Unset = UNSET
+    tool_name: None | str | Unset = UNSET
+    state: None | str | Unset = UNSET
+    input_: Any | Unset = UNSET
+    output: Any | Unset = UNSET
+    args: Any | Unset = UNSET
+    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        type_ = self.type_
+
+        text: None | str | Unset
+        if isinstance(self.text, Unset):
+            text = UNSET
+        else:
+            text = self.text
+
+        content_type: None | str | Unset
+        if isinstance(self.content_type, Unset):
+            content_type = UNSET
+        else:
+            content_type = self.content_type
+
+        url: None | str | Unset
+        if isinstance(self.url, Unset):
+            url = UNSET
+        else:
+            url = self.url
+
+        data = self.data
+
+        tool_call_id: None | str | Unset
+        if isinstance(self.tool_call_id, Unset):
+            tool_call_id = UNSET
+        else:
+            tool_call_id = self.tool_call_id
+
+        tool_name: None | str | Unset
+        if isinstance(self.tool_name, Unset):
+            tool_name = UNSET
+        else:
+            tool_name = self.tool_name
+
+        state: None | str | Unset
+        if isinstance(self.state, Unset):
+            state = UNSET
+        else:
+            state = self.state
+
+        input_ = self.input_
+
+        output = self.output
+
+        args = self.args
+
+        field_dict: dict[str, Any] = {}
+        field_dict.update(self.additional_properties)
+        field_dict.update(
+            {
+                "type": type_,
+            }
+        )
+        if text is not UNSET:
+            field_dict["text"] = text
+        if content_type is not UNSET:
+            field_dict["contentType"] = content_type
+        if url is not UNSET:
+            field_dict["url"] = url
+        if data is not UNSET:
+            field_dict["data"] = data
+        if tool_call_id is not UNSET:
+            field_dict["toolCallId"] = tool_call_id
+        if tool_name is not UNSET:
+            field_dict["toolName"] = tool_name
+        if state is not UNSET:
+            field_dict["state"] = state
+        if input_ is not UNSET:
+            field_dict["input"] = input_
+        if output is not UNSET:
+            field_dict["output"] = output
+        if args is not UNSET:
+            field_dict["args"] = args
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        d = dict(src_dict)
+        type_ = d.pop("type")
+
+        def _parse_text(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        text = _parse_text(d.pop("text", UNSET))
+
+        def _parse_content_type(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        content_type = _parse_content_type(d.pop("contentType", UNSET))
+
+        def _parse_url(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        url = _parse_url(d.pop("url", UNSET))
+
+        data = d.pop("data", UNSET)
+
+        def _parse_tool_call_id(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        tool_call_id = _parse_tool_call_id(d.pop("toolCallId", UNSET))
+
+        def _parse_tool_name(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        tool_name = _parse_tool_name(d.pop("toolName", UNSET))
+
+        def _parse_state(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        state = _parse_state(d.pop("state", UNSET))
+
+        input_ = d.pop("input", UNSET)
+
+        output = d.pop("output", UNSET)
+
+        args = d.pop("args", UNSET)
+
+        client_message_part = cls(
+            type_=type_,
+            text=text,
+            content_type=content_type,
+            url=url,
+            data=data,
+            tool_call_id=tool_call_id,
+            tool_name=tool_name,
+            state=state,
+            input_=input_,
+            output=output,
+            args=args,
+        )
+
+        client_message_part.additional_properties = d
+        return client_message_part
+
+    @property
+    def additional_keys(self) -> list[str]:
+        return list(self.additional_properties.keys())
+
+    def __getitem__(self, key: str) -> Any:
+        return self.additional_properties[key]
+
+    def __setitem__(self, key: str, value: Any) -> None:
+        self.additional_properties[key] = value
+
+    def __delitem__(self, key: str) -> None:
+        del self.additional_properties[key]
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.additional_properties
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/tool_invocation.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/tool_invocation.py
new file mode 100644
index 000000000..775865148
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/tool_invocation.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, TypeVar
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+from ..models.tool_invocation_state import ToolInvocationState
+
+T = TypeVar("T", bound="ToolInvocation")
+
+
+@_attrs_define
+class ToolInvocation:
+    """
+    Attributes:
+        state (ToolInvocationState):
+        tool_call_id (str):
+        tool_name (str):
+        args (Any):
+        result (Any):
+    """
+
+    state: ToolInvocationState
+    tool_call_id: str
+    tool_name: str
+    args: Any
+    result: Any
+    additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        state = self.state.value
+
+        tool_call_id = self.tool_call_id
+
+        tool_name = self.tool_name
+
+        args = self.args
+
+        result = self.result
+
+        field_dict: dict[str, Any] = {}
+        field_dict.update(self.additional_properties)
+        field_dict.update(
+            {
+                "state": state,
+                "toolCallId": tool_call_id,
+                "toolName": tool_name,
+                "args": args,
+                "result": result,
+            }
+        )
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        d = dict(src_dict)
+        state = ToolInvocationState(d.pop("state"))
+
+        tool_call_id = d.pop("toolCallId")
+
+        tool_name = d.pop("toolName")
+
+        args = d.pop("args")
+
+        result = d.pop("result")
+
+        tool_invocation = cls(
+            state=state,
+            tool_call_id=tool_call_id,
+            tool_name=tool_name,
+            args=args,
+            result=result,
+        )
+
+        tool_invocation.additional_properties = d
+        return tool_invocation
+
+    @property
+    def additional_keys(self) -> list[str]:
+        return list(self.additional_properties.keys())
+
+    def __getitem__(self, key: str) -> Any:
+        return self.additional_properties[key]
+
+    def __setitem__(self, key: str, value: Any) -> None:
+        self.additional_properties[key] = value
+
+    def __delitem__(self, key: str) -> None:
+        del self.additional_properties[key]
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.additional_properties
diff --git a/app/desktop/studio_server/api_client/kiln_ai_server_client/models/tool_invocation_state.py b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/tool_invocation_state.py
new file mode 100644
index 000000000..eed1ef25c
--- /dev/null
+++ b/app/desktop/studio_server/api_client/kiln_ai_server_client/models/tool_invocation_state.py
@@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class ToolInvocationState(str, Enum):
+    CALL = "call"
+    PARTIAL_CALL = "partial-call"
+    RESULT = "result"
+
+    def __str__(self) -> str:
+        return str(self.value)
diff --git a/app/desktop/studio_server/chat_api.py b/app/desktop/studio_server/chat_api.py
new file mode 100644
index 000000000..1457171b5
--- /dev/null
+++ b/app/desktop/studio_server/chat_api.py
@@ -0,0 +1,209 @@
+import json
+import logging
+from typing import Any
+
+import httpx
+from app.desktop.studio_server.api_client.kiln_server_client import (
+    _get_base_url,
+    _get_common_headers,
+)
+from app.desktop.studio_server.utils.copilot_utils import get_copilot_api_key
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse
+from kiln_ai.datamodel import Project, TaskRun
+from kiln_ai.utils.config import Config
+
+logger = logging.getLogger(__name__)
+
+_CHAT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=30.0)
+_MAX_CLIENT_TOOL_ROUNDS = 5
+
+
+def _build_upstream_headers(api_key: str) -> dict[str, str]:
+    return {
+        **_get_common_headers(),
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+
+def _find_task_run_by_id(task_run_id: str) -> TaskRun | None:
+    """Search all projects and tasks for a task run with the given ID."""
+    project_paths = Config.shared().projects or []
+    for project_path in project_paths:
+        try:
+            project = Project.load_from_file(project_path)
+        except Exception:
+            continue
+        for task in project.tasks():
+            run = TaskRun.from_id_and_parent_path(task_run_id, task.path)
+            if run is not None:
+                return run
+    return None
+
+
+def _execute_client_tool(tool_name: str, arguments: dict[str, Any]) -> str:
+    """Execute a client-side tool and return the result as a string."""
+    if tool_name == "read_task_run":
+        task_run_id = arguments.get("task_run_id", "")
+        if not task_run_id:
+            return json.dumps({"error": "task_run_id is required"})
+        try:
+            run = _find_task_run_by_id(task_run_id)
+            if run is None:
+                return json.dumps({"error": f"Task run not found: {task_run_id}"})
+            return run.model_dump_json(indent=2)
+        except Exception as e:
+            return json.dumps({"error": f"Failed to read task run: {e}"})
+    return json.dumps({"error": f"Unknown client tool: {tool_name}"})
+
+
+def _parse_sse_events(
+    raw: bytes,
+) -> tuple[list[bytes], dict[str, Any] | None]:
+    """Parse raw SSE bytes into passthrough lines and an optional client-tool-call event.
+
+    Returns (lines_to_forward, client_tool_event_or_none).
+    """
+    lines_to_forward: list[bytes] = []
+    client_tool_event: dict[str, Any] | None = None
+
+    for line in raw.split(b"\n"):
+        if line.startswith(b"data: "):
+            payload = line[6:].strip()
+            if payload and payload != b"[DONE]":
+                try:
+                    event = json.loads(payload)
+                    if (
+                        isinstance(event, dict)
+                        and event.get("type") == "client-tool-call"
+                    ):
+                        client_tool_event = event
+                        continue
+                except (json.JSONDecodeError, TypeError):
+                    pass
+        lines_to_forward.append(line)
+
+    return lines_to_forward, client_tool_event
+
+
+def connect_chat_api(app: FastAPI) -> None:
+    @app.post("/api/chat")
+    async def chat(request: Request) -> StreamingResponse:
+        api_key = get_copilot_api_key()
+        body_bytes = await request.body()
+        body_json = json.loads(body_bytes)
+
+        upstream_url = f"{_get_base_url()}/v1/chat/"
+        headers = _build_upstream_headers(api_key)
+
+        async def stream_with_client_tools():
+            current_body = body_json
+            rounds = 0
+
+            while rounds < _MAX_CLIENT_TOOL_ROUNDS:
+                rounds += 1
+                client_tool_event = None
+
+                async with httpx.AsyncClient(timeout=_CHAT_TIMEOUT) as client:
+                    async with client.stream(
+                        "POST",
+                        upstream_url,
+                        content=json.dumps(current_body).encode(),
+                        headers=headers,
+                    ) as upstream:
+                        if upstream.status_code != 200:
+                            error_body = await upstream.aread()
+                            detail = "Chat request failed."
+                            if error_body.startswith(b"{"):
+                                try:
+                                    detail = (
+                                        json.loads(error_body).get("message", detail)
+                                        or detail
+                                    )
+                                except json.JSONDecodeError:
+                                    pass
+                            yield f"data: {json.dumps({'type': 'error', 'message': detail})}\n\n".encode()
+                            return
+
+                        try:
+                            async for chunk in upstream.aiter_bytes():
+                                lines, tool_event = _parse_sse_events(chunk)
+                                if tool_event:
+                                    client_tool_event = tool_event
+                                forward_bytes = b"\n".join(lines)
+                                if forward_bytes.strip():
+                                    yield forward_bytes + b"\n"
+                        except httpx.RemoteProtocolError:
+                            if client_tool_event is not None:
+                                logger.debug(
+                                    "Connection closed after client tool call event (expected)"
+                                )
+                            else:
+                                raise
+
+                if client_tool_event is None:
+                    return
+
+                tool_name = client_tool_event.get("toolName", "")
+                tool_call_id = client_tool_event.get("toolCallId", "")
+                tool_input = client_tool_event.get("input", {})
+
+                logger.info(
+                    f"Executing client tool: {tool_name} (call_id={tool_call_id})"
+                )
+
+                yield f"data: {json.dumps({'type': 'tool-output-available', 'toolCallId': tool_call_id, 'output': '(executing locally...)'})}\n\n".encode()
+
+                tool_result = _execute_client_tool(tool_name, tool_input)
+
+                yield f"data: {json.dumps({'type': 'tool-output-available', 'toolCallId': tool_call_id, 'output': tool_result})}\n\n".encode()
+
+                current_body = _build_continuation_body(
+                    current_body, tool_call_id, tool_name, tool_input, tool_result
+                )
+
+        return StreamingResponse(
+            content=stream_with_client_tools(),
+            media_type="text/event-stream",
+        )
+
+
+def _build_continuation_body(
+    original_body: dict[str, Any],
+    tool_call_id: str,
+    tool_name: str,
+    tool_input: Any,
+    tool_result: str,
+) -> dict[str, Any]:
+    """Build the request body for continuing after a client tool call.
+
+    Appends a single assistant message containing both the tool call and its
+    result so the backend's convert_to_openai_messages produces the correct
+    assistant(tool_calls) + tool(result) sequence.
+    """
+    messages = list(original_body.get("messages", []))
+
+    messages.append(
+        {
+            "role": "assistant",
+            "parts": [
+                {
+                    "type": f"tool-{tool_name}",
+                    "toolCallId": tool_call_id,
+                    "toolName": tool_name,
+                    "input": tool_input,
+                    "state": "call",
+                },
+                {
+                    "type": f"tool-{tool_name}",
+                    "toolCallId": tool_call_id,
+                    "toolName": tool_name,
+                    "output": tool_result,
+                    "state": "output-available",
+                },
+            ],
+        }
+    )
+
+    return {**original_body, "messages": messages}
diff --git a/app/desktop/studio_server/test_chat_api.py b/app/desktop/studio_server/test_chat_api.py
new file mode 100644
index 000000000..3be97003c
--- /dev/null
+++ b/app/desktop/studio_server/test_chat_api.py
@@ -0,0 +1,283 @@
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from app.desktop.studio_server.chat_api import (
+    _build_continuation_body,
+    _execute_client_tool,
+    _parse_sse_events,
+    connect_chat_api,
+)
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from kiln_server.custom_errors import connect_custom_errors
+
+
+@pytest.fixture
+def app():
+    app = FastAPI()
+    connect_custom_errors(app)
+    connect_chat_api(app)
+    return app
+
+
+@pytest.fixture
+def client(app):
+    return TestClient(app)
+
+
+@pytest.fixture
+def mock_api_key():
+    with patch(
+        "app.desktop.studio_server.utils.copilot_utils.Config.shared"
+    ) as mock_config_shared:
+        mock_config = mock_config_shared.return_value
+        mock_config.kiln_copilot_api_key = "test_api_key"
+        yield mock_config
+
+
+def _make_httpx_mock(status_code: int = 200, chunks: list[bytes] | None = None):
+    if chunks is None:
+        chunks = [b'data: {"type":"text-delta","delta":"hello"}\n\n']
+
+    async def mock_aiter_bytes():
+        for chunk in chunks:
+            yield chunk
+
+    mock_upstream = MagicMock()
+    mock_upstream.status_code = status_code
+    mock_upstream.aiter_bytes.return_value = mock_aiter_bytes()
+    mock_upstream.aread = AsyncMock(
+        return_value=b'{"message":"upstream error"}' if status_code != 200 else b""
+    )
+    mock_upstream.__aenter__ = AsyncMock(return_value=mock_upstream)
+    mock_upstream.__aexit__ = AsyncMock(return_value=None)
+
+    mock_client = MagicMock()
+    mock_client.stream.return_value = mock_upstream
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+
+    mock_async_client_class = MagicMock(return_value=mock_client)
+    return mock_async_client_class, mock_client, mock_upstream
+
+
+# --- SSE passthrough tests ---
+
+
+class TestChatStreaming:
+    def test_streams_chunks(self, client, mock_api_key):
+        chunks = [
+            b'data: {"type":"text-delta","delta":"hello"}\n\n',
+            b'data: {"type":"finish"}\n\n',
+        ]
+        mock_class, _, _ = _make_httpx_mock(chunks=chunks)
+
+        with patch("app.desktop.studio_server.chat_api.httpx.AsyncClient", mock_class):
+            response = client.post(
+                "/api/chat",
+                json={"messages": [{"role": "user", "content": "hi"}]},
+            )
+
+        assert response.status_code == 200
+        assert response.headers["content-type"].startswith("text/event-stream")
+        assert b"text-delta" in response.content
+
+    def test_forwards_auth_header(self, client, mock_api_key):
+        mock_class, mock_client, _ = _make_httpx_mock()
+
+        with patch("app.desktop.studio_server.chat_api.httpx.AsyncClient", mock_class):
+            client.post(
+                "/api/chat",
+                json={"messages": [{"role": "user", "content": "hi"}]},
+            )
+
+        call_kwargs = mock_client.stream.call_args
+        headers = call_kwargs.kwargs.get("headers", {})
+        assert headers.get("Authorization") == "Bearer test_api_key"
+
+    def test_returns_401_when_no_api_key(self, client):
+        with patch(
+            "app.desktop.studio_server.utils.copilot_utils.Config.shared"
+        ) as mock_config_shared:
+            mock_config = mock_config_shared.return_value
+            mock_config.kiln_copilot_api_key = None
+
+            response = client.post(
+                "/api/chat",
+                json={"messages": [{"role": "user", "content": "hi"}]},
+            )
+
+        assert response.status_code == 401
+
+    def test_handles_upstream_error(self, client, mock_api_key):
+        mock_class, _, _ = _make_httpx_mock(status_code=500)
+
+        with patch("app.desktop.studio_server.chat_api.httpx.AsyncClient", mock_class):
+            response = client.post(
+                "/api/chat",
+                json={"messages": [{"role": "user", "content": "hi"}]},
+            )
+
+        assert response.status_code == 200
+        assert b"error" in response.content
+
+
+# --- SSE parsing tests ---
+
+
+class TestParseSSEEvents:
+    def test_passthrough_normal_events(self):
+        raw = b'data: {"type":"text-delta","delta":"hi"}\n\n'
+        lines, tool_event = _parse_sse_events(raw)
+        assert tool_event is None
+        assert any(b"text-delta" in line for line in lines)
+
+    def test_detects_client_tool_call(self):
+        raw = (
+            b'data: {"type":"text-delta","delta":"hi"}\n'
+            b'data: {"type":"client-tool-call","toolCallId":"tc1","toolName":"read_task_run","input":{"path":"/x"}}\n\n'
+        )
+        lines, tool_event = _parse_sse_events(raw)
+        assert tool_event is not None
+        assert tool_event["toolName"] == "read_task_run"
+        assert tool_event["toolCallId"] == "tc1"
+        assert not any(b"client-tool-call" in line for line in lines)
+
+    def test_handles_empty_input(self):
+        lines, tool_event = _parse_sse_events(b"")
+        assert tool_event is None
+
+
+# --- Client tool execution tests ---
+
+
+class TestExecuteClientTool:
+    def test_read_task_run_success(self):
+        mock_run = MagicMock()
+        mock_run.model_dump_json.return_value = '{"id": "42", "input": "hello"}'
+
+        with patch(
+            "app.desktop.studio_server.chat_api._find_task_run_by_id",
+            return_value=mock_run,
+        ):
+            result = _execute_client_tool("read_task_run", {"task_run_id": "42"})
+        assert '"id": "42"' in result
+
+    def test_read_task_run_not_found(self):
+        with patch(
+            "app.desktop.studio_server.chat_api._find_task_run_by_id",
+            return_value=None,
+        ):
+            result = _execute_client_tool("read_task_run", {"task_run_id": "999"})
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "999" in parsed["error"]
+
+    def test_read_task_run_missing_id(self):
+        result = _execute_client_tool("read_task_run", {})
+        parsed = json.loads(result)
+        assert "error" in parsed
+
+    def test_unknown_tool(self):
+        result = _execute_client_tool("unknown_tool", {})
+        assert "Unknown client tool" in result
+
+
+# --- Continuation body tests ---
+
+
+class TestBuildContinuationBody:
+    def test_appends_tool_messages(self):
+        original = {"messages": [{"role": "user", "content": "hi"}]}
+        result = _build_continuation_body(
+            original, "tc1", "read_task_run", {"path": "/x"}, '{"data": "result"}'
+        )
+
+        assert len(result["messages"]) == 2
+        assert result["messages"][0]["role"] == "user"
+
+        parts = result["messages"][1]["parts"]
+        assert result["messages"][1]["role"] == "assistant"
+        assert len(parts) == 2
+        assert parts[0]["toolCallId"] == "tc1"
+        assert parts[0]["state"] == "call"
+        assert parts[0]["input"] == {"path": "/x"}
+        assert parts[1]["state"] == "output-available"
+        assert parts[1]["output"] == '{"data": "result"}'
+        assert "input" not in parts[1]
+
+    def test_preserves_original_body_fields(self):
+        original = {
+            "messages": [{"role": "user", "content": "hi"}],
+            "task_id": "test_task",
+        }
+        result = _build_continuation_body(original, "tc1", "tool", {}, "result")
+        assert result["task_id"] == "test_task"
+
+
+# --- Client tool round-trip test ---
+
+
+class TestClientToolRoundTrip:
+    def test_detects_and_continues_after_client_tool(self, client, mock_api_key):
+        """First request returns client-tool-call, proxy executes locally and sends continuation."""
+        first_response_chunks = [
+            b'data: {"type":"text-delta","delta":"Let me read that"}\n\n',
+            b'data: {"type":"client-tool-call","toolCallId":"tc1","toolName":"read_task_run","input":{"path":"/fake"}}\n\n',
+            b'data: {"type":"finish"}\n\n',
+        ]
+        second_response_chunks = [
+            b'data: {"type":"text-delta","delta":"Here is the result"}\n\n',
+            b'data: {"type":"finish"}\n\n',
+        ]
+
+        call_count = 0
+
+        def make_stream_mock(chunks):
+            async def mock_aiter_bytes():
+                for chunk in chunks:
+                    yield chunk
+
+            mock_upstream = MagicMock()
+            mock_upstream.status_code = 200
+            mock_upstream.aiter_bytes.return_value = mock_aiter_bytes()
+            mock_upstream.__aenter__ = AsyncMock(return_value=mock_upstream)
+            mock_upstream.__aexit__ = AsyncMock(return_value=None)
+            return mock_upstream
+
+        def side_effect_stream(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return make_stream_mock(first_response_chunks)
+            return make_stream_mock(second_response_chunks)
+
+        mock_client = MagicMock()
+        mock_client.stream.side_effect = side_effect_stream
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=None)
+
+        mock_class = MagicMock(return_value=mock_client)
+
+        with (
+            patch("app.desktop.studio_server.chat_api.httpx.AsyncClient", mock_class),
+            patch(
+                "app.desktop.studio_server.chat_api._execute_client_tool",
+                return_value='{"data": "mock result"}',
+            ),
+        ):
+            response = client.post(
+                "/api/chat",
+                json={"messages": [{"role": "user", "content": "read my task run"}]},
+            )
+
+        assert response.status_code == 200
+        content = response.content
+        assert b"Let me read that" in content
+        assert b"Here is the result" in content
+        assert call_count == 2
+
+        continuation_call = mock_client.stream.call_args_list[1]
+        continuation_body = json.loads(continuation_call.kwargs["content"])
+        assert len(continuation_body["messages"]) == 2
diff --git a/app/web_ui/package-lock.json b/app/web_ui/package-lock.json
index 4a092c346..434db91ae 100644
--- a/app/web_ui/package-lock.json
+++ b/app/web_ui/package-lock.json
@@ -10,8 +10,10 @@
       "dependencies": {
         "@floating-ui/dom": "^1.7.2",
         "@kinde-oss/kinde-auth-pkce-js": "^4.3.0",
+        "dompurify": "^3.3.2",
         "echarts": "^6.0.0",
         "highlight.js": "^11.10.0",
+        "marked": "^17.0.4",
         "openapi-fetch": "^0.12.2",
         "posthog-js": "^1.184.2"
       },
@@ -21,6 +23,7 @@
         "@sveltejs/kit": "^2.20.6",
         "@sveltejs/vite-plugin-svelte": "^3.1.1",
         "@tailwindcss/typography": "^0.5.13",
+        "@types/dompurify": "^3.0.5",
         "@typescript-eslint/eslint-plugin": "^8.46.1",
         "@typescript-eslint/parser": "^8.46.1",
         "autoprefixer": "^10.4.15",
@@ -2096,6 +2099,16 @@
       "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==",
       "dev": true
     },
+    "node_modules/@types/dompurify": {
+      "version": "3.0.5",
+      "resolved": "https://registry.npmjs.org/@types/dompurify/-/dompurify-3.0.5.tgz",
+      "integrity": "sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/trusted-types": "*"
+      }
+    },
     "node_modules/@types/estree": {
       "version": "1.0.6",
       "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz",
@@ -2134,9 +2147,8 @@
       "version": "2.0.7",
       "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz",
       "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==",
-      "dev": true,
-      "license": "MIT",
-      "optional": true
+      "devOptional": true,
+      "license": "MIT"
     },
     "node_modules/@typescript-eslint/eslint-plugin": {
       "version": "8.46.1",
@@ -3372,11 +3384,13 @@
       "dev": true
     },
     "node_modules/dompurify": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.0.tgz",
-      "integrity": "sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==",
-      "dev": true,
+      "version": "3.3.2",
+      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.2.tgz",
+      "integrity": "sha512-6obghkliLdmKa56xdbLOpUZ43pAR6xFy1uOrxBaIDjT+yaRuuybLjGS9eVBoSR/UPU5fq3OXClEHLJNGvbxKpQ==",
       "license": "(MPL-2.0 OR Apache-2.0)",
+      "engines": {
+        "node": ">=20"
+      },
       "optionalDependencies": {
         "@types/trusted-types": "^2.0.7"
       }
@@ -4878,16 +4892,15 @@
       "license": "MIT"
     },
     "node_modules/marked": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz",
-      "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==",
-      "dev": true,
+      "version": "17.0.4",
+      "resolved": "https://registry.npmjs.org/marked/-/marked-17.0.4.tgz",
+      "integrity": "sha512-NOmVMM+KAokHMvjWmC5N/ZOvgmSWuqJB8FoYI019j4ogb/PeRMKoKIjReZ2w3376kkA8dSJIP8uD993Kxc0iRQ==",
       "license": "MIT",
       "bin": {
         "marked": "bin/marked.js"
       },
       "engines": {
-        "node": ">= 12"
+        "node": ">= 20"
       }
     },
     "node_modules/math-intrinsics": {
@@ -6200,6 +6213,19 @@
         "styled-components": "^4.1.1 || ^5.1.1 || ^6.0.5"
       }
     },
+    "node_modules/redoc/node_modules/marked": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz",
+      "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "marked": "bin/marked.js"
+      },
+      "engines": {
+        "node": ">= 12"
+      }
+    },
     "node_modules/reftools": {
       "version": "1.1.9",
       "resolved": "https://registry.npmjs.org/reftools/-/reftools-1.1.9.tgz",
diff --git a/app/web_ui/package.json b/app/web_ui/package.json
index 28038aa15..11ccd4f57 100644
--- a/app/web_ui/package.json
+++ b/app/web_ui/package.json
@@ -21,6 +21,7 @@
     "@sveltejs/kit": "^2.20.6",
     "@sveltejs/vite-plugin-svelte": "^3.1.1",
     "@tailwindcss/typography": "^0.5.13",
+    "@types/dompurify": "^3.0.5",
     "@typescript-eslint/eslint-plugin": "^8.46.1",
     "@typescript-eslint/parser": "^8.46.1",
     "autoprefixer": "^10.4.15",
@@ -43,9 +44,11 @@
   "type": "module",
   "dependencies": {
     "@floating-ui/dom": "^1.7.2",
-    "echarts": "^6.0.0",
     "@kinde-oss/kinde-auth-pkce-js": "^4.3.0",
+    "dompurify": "^3.3.2",
+    "echarts": "^6.0.0",
     "highlight.js": "^11.10.0",
+    "marked": "^17.0.4",
     "openapi-fetch": "^0.12.2",
     "posthog-js": "^1.184.2"
   },
diff --git a/app/web_ui/src/app.css b/app/web_ui/src/app.css
index 073088829..46c1c575d 100644
--- a/app/web_ui/src/app.css
+++ b/app/web_ui/src/app.css
@@ -28,3 +28,17 @@ input::-webkit-inner-spin-button {
 input[type="number"] {
   -moz-appearance: textfield;
 }
+
+@keyframes thinking-dot {
+  0%,
+  100% {
+    opacity: 0.35;
+  }
+  50% {
+    opacity: 1;
+  }
+}
+
+.thinking-dot {
+  animation: thinking-dot 0.8s ease-in-out infinite;
+}
diff --git a/app/web_ui/src/lib/chat/ChatMarkdown.svelte b/app/web_ui/src/lib/chat/ChatMarkdown.svelte
new file mode 100644
index 000000000..342a63c90
--- /dev/null
+++ b/app/web_ui/src/lib/chat/ChatMarkdown.svelte
@@ -0,0 +1,108 @@
+<script lang="ts">
+  import { marked, type Token } from "marked"
+  import DOMPurify from "dompurify"
+  import hljs from "highlight.js/lib/core"
+  import json from "highlight.js/lib/languages/json"
+  import javascript from "highlight.js/lib/languages/javascript"
+  import typescript from "highlight.js/lib/languages/typescript"
+  import python from "highlight.js/lib/languages/python"
+  import bash from "highlight.js/lib/languages/bash"
+
+  hljs.registerLanguage("json", json)
+  hljs.registerLanguage("javascript", javascript)
+  hljs.registerLanguage("js", javascript)
+  hljs.registerLanguage("typescript", typescript)
+  hljs.registerLanguage("ts", typescript)
+  hljs.registerLanguage("python", python)
+  hljs.registerLanguage("py", python)
+  hljs.registerLanguage("bash", bash)
+  hljs.registerLanguage("shell", bash)
+  hljs.registerLanguage("sh", bash)
+
+  function escapeHtml(s: string): string {
+    return s
+      .replace(/&/g, "&amp;")
+      .replace(/</g, "&lt;")
+      .replace(/>/g, "&gt;")
+      .replace(/"/g, "&quot;")
+  }
+
+  marked.use({
+    gfm: true,
+    renderer: {
+      code({ text, lang }: { text: string; lang?: string }) {
+        const code = text ?? ""
+        const language = (lang ?? "").toLowerCase()
+        let highlighted: string
+        if (language && hljs.getLanguage(language)) {
+          try {
+            highlighted = hljs.highlight(code, { language }).value
+          } catch {
+            highlighted = escapeHtml(code)
+          }
+        } else {
+          try {
+            highlighted = hljs.highlightAuto(code).value
+          } catch {
+            highlighted = escapeHtml(code)
+          }
+        }
+        return `<pre class="hljs rounded-lg bg-base-300/40 overflow-x-auto p-3 text-sm"><code>${highlighted}</code></pre>`
+      },
+      codespan({ text }: { text: string }) {
+        return `<code class="px-1.5 py-0.5 rounded bg-base-300/50 text-sm font-mono text-base-content/90">${escapeHtml(text ?? "")}</code>`
+      },
+      blockquote({ tokens }: { tokens: Token[] }) {
+        const inner = this.parser.parse(tokens)
+        return `<blockquote class="border-l-4 border-base-300 pl-4 my-2 text-base-content/80">${inner}</blockquote>`
+      },
+    },
+  })
+
+  const ALLOWED_TAGS = [
+    "p",
+    "br",
+    "strong",
+    "em",
+    "b",
+    "i",
+    "a",
+    "ul",
+    "ol",
+    "li",
+    "blockquote",
+    "pre",
+    "code",
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6",
+    "hr",
+    "span",
+    "div",
+  ]
+  const ALLOWED_ATTR = ["href", "target", "rel", "class"]
+
+  export let text: string = ""
+
+  $: rawHtml = text ? (marked.parse(text, { async: false }) as string) : ""
+  $: sanitized = rawHtml
+    ? DOMPurify.sanitize(rawHtml, { ALLOWED_TAGS, ALLOWED_ATTR })
+    : ""
+</script>
+
+<svelte:head>
+  <link rel="stylesheet" href="/styles/highlightjs.min.css" />
+</svelte:head>
+
+{#if sanitized}
+  <div
+    class="chat-markdown prose prose-sm max-w-none prose-p:my-2 prose-ul:my-2 prose-ol:my-2 prose-li:my-0.5 prose-headings:mt-4 prose-headings:mb-2 prose-pre:my-2 prose-blockquote:my-2 prose-a:link"
+  >
+    {@html sanitized}
+  </div>
+{:else}
+  <span class="whitespace-pre-wrap">{text || ""}</span>
+{/if}
diff --git a/app/web_ui/src/lib/chat/streaming_chat.ts b/app/web_ui/src/lib/chat/streaming_chat.ts
new file mode 100644
index 000000000..b28f1718e
--- /dev/null
+++ b/app/web_ui/src/lib/chat/streaming_chat.ts
@@ -0,0 +1,338 @@
+/**
+ * Custom streaming chat: parses SSE from the backend (AI SDK protocol JSON events).
+ * Does not use @ai-sdk/svelte because we use Svelte 4 and @ai-sdk/svelte uses Svelte 5.
+ *
+ * There is an ancient version of the lib that works with Svelte 4, but then that forces us
+ * to use an old version of the protocol on the backend too, which is not a good idea.
+ */
+
+export type ChatMessagePart =
+  | { type: "text"; text: string }
+  | { type: "reasoning"; reasoning: string }
+  | {
+      type: `tool-${string}`
+      toolCallId: string
+      toolName?: string
+      input?: unknown
+      output?: unknown
+    }
+
+export interface ChatMessage {
+  id: string
+  role: "user" | "assistant" | "system"
+  content?: string
+  parts?: ChatMessagePart[]
+}
+
+/** Body the backend expects: POST /api/chat */
+export interface BackendChatRequest {
+  messages: Array<{
+    role: string
+    content?: string
+    parts?: Array<Record<string, unknown>>
+  }>
+}
+
+function toBackendMessage(m: ChatMessage): BackendChatRequest["messages"][0] {
+  if (m.role === "user") {
+    return { role: "user", content: m.content ?? "" }
+  }
+  if (m.role === "assistant" && m.parts?.length) {
+    return {
+      role: "assistant",
+      parts: m.parts.map((p) => {
+        if (p.type === "text") return { type: "text", text: p.text }
+        if (p.type === "reasoning")
+          return { type: "reasoning", reasoning: p.reasoning }
+        return {
+          type: p.type,
+          toolCallId: p.toolCallId,
+          toolName: p.toolName,
+          input: p.input,
+          output: p.output,
+        }
+      }),
+    }
+  }
+  return { role: m.role, content: m.content ?? "" }
+}
+
+/** SSE event from backend (AI SDK stream event shape) */
+interface StreamEvent {
+  type: string
+  delta?: string
+  id?: string
+  messageId?: string
+  toolCallId?: string
+  toolName?: string
+  input?: unknown
+  inputTextDelta?: string
+  output?: unknown
+  errorText?: string
+  messageMetadata?: { finishReason?: string; usage?: unknown }
+}
+
+export interface StreamChatOptions {
+  apiUrl: string
+  messages: ChatMessage[]
+  onAssistantMessage: (update: (draft: ChatMessage) => void) => void
+  onFinish: () => void
+  onError: (error: Error) => void
+  signal?: AbortSignal
+}
+
+function generateId(): string {
+  return `msg-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`
+}
+
+/**
+ * POST to apiUrl with messages, then parse SSE stream and call onAssistantMessage
+ * for each event that updates the assistant reply. Calls onFinish when stream ends
+ * or onError on failure. Respects signal for abort.
+ */
+export async function streamChat(options: StreamChatOptions): Promise<void> {
+  const { apiUrl, messages, onAssistantMessage, onFinish, onError, signal } =
+    options
+
+  const body: BackendChatRequest = {
+    messages: messages.map(toBackendMessage),
+  }
+
+  let response: Response
+  try {
+    response = await fetch(apiUrl, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+      signal,
+    })
+  } catch (err) {
+    if ((err as Error).name === "AbortError") {
+      onFinish()
+      return
+    }
+    onError(err instanceof Error ? err : new Error(String(err)))
+    return
+  }
+
+  if (!response.ok) {
+    const text = await response.text()
+    onError(
+      new Error(
+        `Chat API error ${response.status}: ${text || response.statusText}`,
+      ),
+    )
+    return
+  }
+
+  const reader = response.body?.getReader()
+  if (!reader) {
+    onError(new Error("No response body"))
+    return
+  }
+
+  const decoder = new TextDecoder()
+  let buffer = ""
+
+  type PartSlot =
+    | { kind: "text"; id: string }
+    | { kind: "reasoning"; id: string }
+    | { kind: "tool"; id: string }
+  const partOrder: PartSlot[] = []
+  const textBlocks = new Map<string, string>()
+  const reasoningBlocks = new Map<string, string>()
+  const toolMap = new Map<
+    string,
+    {
+      type: `tool-${string}`
+      toolCallId: string
+      toolName?: string
+      input?: unknown
+      output?: unknown
+    }
+  >()
+  const toolInputBuffer = new Map<string, string>()
+  let currentTextId: string | null = null
+  let currentReasoningId: string | null = null
+  let slotIdCounter = 0
+  function nextSlotId(): string {
+    slotIdCounter += 1
+    return `slot-${slotIdCounter}`
+  }
+
+  function flushAssistant() {
+    onAssistantMessage((draft) => {
+      const next: ChatMessagePart[] = []
+      for (const slot of partOrder) {
+        if (slot.kind === "text") {
+          const text = textBlocks.get(slot.id)
+          if (text) next.push({ type: "text", text })
+        } else if (slot.kind === "reasoning") {
+          const reasoning = reasoningBlocks.get(slot.id)
+          if (reasoning) next.push({ type: "reasoning", reasoning })
+        } else {
+          const tool = toolMap.get(slot.id)
+          if (tool) next.push(tool)
+        }
+      }
+      draft.parts = next
+    })
+  }
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read()
+      if (done) break
+      buffer += decoder.decode(value, { stream: true })
+      const lines = buffer.split("\n")
+      buffer = lines.pop() ?? ""
+      for (const line of lines) {
+        if (line.startsWith("data: ")) {
+          const payload = line.slice(6).trim()
+          if (payload === "[DONE]" || payload === "") continue
+          let event: StreamEvent
+          try {
+            event = JSON.parse(payload) as StreamEvent
+          } catch {
+            continue
+          }
+          const typ = event.type
+          if (
+            typ === "text-start" ||
+            (typ === "text-delta" && currentTextId === null)
+          ) {
+            if (typ === "text-start" && currentTextId !== null) {
+              currentTextId = null
+            }
+            if (currentTextId === null) {
+              const id = nextSlotId()
+              partOrder.push({ kind: "text", id })
+              currentTextId = id
+              textBlocks.set(id, "")
+            }
+          }
+          if (typ === "text-delta" && event.delta != null) {
+            if (currentTextId === null) {
+              const id = nextSlotId()
+              partOrder.push({ kind: "text", id })
+              currentTextId = id
+              textBlocks.set(id, "")
+            }
+            textBlocks.set(
+              currentTextId,
+              (textBlocks.get(currentTextId) ?? "") + event.delta,
+            )
+            flushAssistant()
+          } else if (typ === "text-end") {
+            currentTextId = null
+          } else if (
+            typ === "reasoning-start" ||
+            (typ === "reasoning-delta" && currentReasoningId === null)
+          ) {
+            if (typ === "reasoning-start" && currentReasoningId !== null) {
+              currentReasoningId = null
+            }
+            if (currentReasoningId === null) {
+              const id = nextSlotId()
+              partOrder.push({ kind: "reasoning", id })
+              currentReasoningId = id
+              reasoningBlocks.set(id, "")
+            }
+          }
+          if (typ === "reasoning-delta" && event.delta != null) {
+            if (currentReasoningId === null) {
+              const id = nextSlotId()
+              partOrder.push({ kind: "reasoning", id })
+              currentReasoningId = id
+              reasoningBlocks.set(id, "")
+            }
+            reasoningBlocks.set(
+              currentReasoningId,
+              (reasoningBlocks.get(currentReasoningId) ?? "") + event.delta,
+            )
+            flushAssistant()
+          } else if (typ === "reasoning-end") {
+            currentReasoningId = null
+          } else if (typ === "tool-input-start" && event.toolCallId) {
+            const key = event.toolCallId
+            if (!toolMap.has(key)) {
+              partOrder.push({ kind: "tool", id: key })
+              toolMap.set(key, {
+                type: `tool-${event.toolName ?? "unknown"}`,
+                toolCallId: event.toolCallId,
+                toolName: event.toolName,
+              })
+            }
+            flushAssistant()
+          } else if (
+            typ === "tool-input-delta" &&
+            event.toolCallId &&
+            event.inputTextDelta != null
+          ) {
+            const key = event.toolCallId
+            const prev = toolInputBuffer.get(key) ?? ""
+            toolInputBuffer.set(key, prev + event.inputTextDelta)
+            let entry = toolMap.get(key)
+            if (!entry) {
+              partOrder.push({ kind: "tool", id: key })
+              entry = {
+                type: `tool-${event.toolName ?? "unknown"}`,
+                toolCallId: event.toolCallId,
+                toolName: event.toolName,
+              }
+              toolMap.set(key, entry)
+            }
+            try {
+              entry.input = JSON.parse(
+                toolInputBuffer.get(key) ?? "{}",
+              ) as unknown
+            } catch {
+              entry.input = toolInputBuffer.get(key)
+            }
+            flushAssistant()
+          } else if (typ === "tool-input-available" && event.toolCallId) {
+            const key = event.toolCallId
+            let entry = toolMap.get(key)
+            if (!entry) {
+              partOrder.push({ kind: "tool", id: key })
+              entry = {
+                type: `tool-${event.toolName ?? "unknown"}`,
+                toolCallId: event.toolCallId,
+                toolName: event.toolName,
+                input: event.input,
+              }
+              toolMap.set(key, entry)
+            } else {
+              entry.input = event.input
+            }
+            toolInputBuffer.delete(key)
+            flushAssistant()
+          } else if (typ === "tool-output-available" && event.toolCallId) {
+            const entry = toolMap.get(event.toolCallId)
+            if (entry) {
+              entry.output = event.output
+              flushAssistant()
+            }
+          } else if (typ === "tool-output-error" && event.toolCallId) {
+            const entry = toolMap.get(event.toolCallId)
+            if (entry) {
+              entry.output = { error: event.errorText }
+              flushAssistant()
+            }
+          } else if (typ === "finish" || typ === "finish-step") {
+            break
+          }
+        }
+      }
+    }
+    onFinish()
+  } catch (err) {
+    if ((err as Error).name === "AbortError") {
+      onFinish()
+      return
+    }
+    onError(err instanceof Error ? err : new Error(String(err)))
+  }
+}
+
+export { generateId as chatGenerateId }
diff --git a/app/web_ui/src/lib/ui/icons/arrow_up_icon.svelte b/app/web_ui/src/lib/ui/icons/arrow_up_icon.svelte
new file mode 100644
index 000000000..a8a9d1153
--- /dev/null
+++ b/app/web_ui/src/lib/ui/icons/arrow_up_icon.svelte
@@ -0,0 +1,15 @@
+<svg
+  class="w-full h-full"
+  viewBox="0 0 24 24"
+  fill="none"
+  xmlns="http://www.w3.org/2000/svg"
+  aria-hidden="true"
+>
+  <path
+    d="M12 20L12 4M12 4L18 10M12 4L6 10"
+    stroke="currentColor"
+    stroke-width="1.5"
+    stroke-linecap="round"
+    stroke-linejoin="round"
+  />
+</svg>
diff --git a/app/web_ui/src/lib/ui/icons/stop_icon.svelte b/app/web_ui/src/lib/ui/icons/stop_icon.svelte
new file mode 100644
index 000000000..77bab00a8
--- /dev/null
+++ b/app/web_ui/src/lib/ui/icons/stop_icon.svelte
@@ -0,0 +1,13 @@
+<svg
+  class="w-full h-full"
+  viewBox="0 0 24 24"
+  fill="none"
+  xmlns="http://www.w3.org/2000/svg"
+  aria-hidden="true"
+>
+  <path
+    d="M2 12C2 7.28595 2 4.92893 3.46447 3.46447C4.92893 2 7.28595 2 12 2C16.714 2 19.0711 2 20.5355 3.46447C22 4.92893 22 7.28595 22 12C22 16.714 22 19.0711 20.5355 20.5355C19.0711 22 16.714 22 12 22C7.28595 22 4.92893 22 3.46447 20.5355C2 19.0711 2 16.714 2 12Z"
+    stroke="currentColor"
+    stroke-width="1.5"
+  />
+</svg>
diff --git a/app/web_ui/src/routes/(app)/+layout.svelte b/app/web_ui/src/routes/(app)/+layout.svelte
index 661516a6d..5bb004de7 100644
--- a/app/web_ui/src/routes/(app)/+layout.svelte
+++ b/app/web_ui/src/routes/(app)/+layout.svelte
@@ -30,6 +30,7 @@
     Specs,
     Generate,
     Run,
+    Chat,
     FineTune,
     Models,
     Optimize,
@@ -67,6 +68,8 @@
       section = Section.Specs
     } else if (path_start("/optimize", $page.url.pathname)) {
       section = Section.Optimize
+    } else if (path_start("/chat", $page.url.pathname)) {
+      section = Section.Chat
     } else {
       section = Section.None
     }
@@ -174,6 +177,25 @@
           Run</a
         >
       </li>
+      <li class="menu-md">
+        <a href="/chat" class={section == Section.Chat ? "active" : ""}>
+          <svg
+            class="w-6 h-6 mr-2"
+            viewBox="0 0 24 24"
+            fill="none"
+            xmlns="http://www.w3.org/2000/svg"
+          >
+            <path
+              d="M8 12H8.01M12 12H12.01M16 12H16.01M21 12C21 16.4183 16.9706 20 12 20C10.4607 20 9.01172 19.6565 7.74467 19.0511L3 20L4.39499 16.28C3.51156 15.0424 3 13.5743 3 12C3 7.58172 7.02944 4 12 4C16.9706 4 21 7.58172 21 12Z"
+              stroke="currentColor"
+              stroke-width="1.5"
+              stroke-linecap="round"
+              stroke-linejoin="round"
+            />
+          </svg>
+          Chat</a
+        >
+      </li>
       <li class="menu-md">
         <a
           href={`/dataset/${$ui_state.current_project_id}/${$ui_state.current_task_id}`}
diff --git a/app/web_ui/src/routes/(app)/chat/+page.svelte b/app/web_ui/src/routes/(app)/chat/+page.svelte
new file mode 100644
index 000000000..d53aff8e7
--- /dev/null
+++ b/app/web_ui/src/routes/(app)/chat/+page.svelte
@@ -0,0 +1,577 @@
+<script lang="ts">
+  import { onMount, onDestroy } from "svelte"
+  import { fly } from "svelte/transition"
+  import AppPage from "../app_page.svelte"
+  import {
+    streamChat,
+    chatGenerateId,
+    type ChatMessage,
+    type ChatMessagePart,
+  } from "$lib/chat/streaming_chat"
+  import ChatMarkdown from "$lib/chat/ChatMarkdown.svelte"
+  import ArrowUpIcon from "$lib/ui/icons/arrow_up_icon.svelte"
+  import StopIcon from "$lib/ui/icons/stop_icon.svelte"
+  import { base_url } from "$lib/api_client"
+
+  const CHAT_API_URL = `${base_url}/api/chat`
+
+  let messages: ChatMessage[] = []
+  let input = ""
+  let status: "ready" | "submitted" | "streaming" | "error" = "ready"
+  let error: Error | null = null
+  let abortController: AbortController | null = null
+  let messagesContainer: HTMLDivElement | null = null
+  let messagesEndRef: HTMLDivElement | null = null
+  let scrollObserver: MutationObserver | null = null
+  let textareaRef: HTMLTextAreaElement | null = null
+  let collapsedPartKeys: Record<string, boolean> = {}
+  let reasoningPartStartTimes: Record<string, number> = {}
+  let reasoningPartEndTimes: Record<string, number> = {}
+  let lastSeenLastPartKey: string | null = null
+
+  $: isLoading = status === "submitted" || status === "streaming"
+
+  $: lastMessage = messages[messages.length - 1]
+  $: lastParts = lastMessage?.parts ?? []
+  $: lastPartKey =
+    lastParts.length > 0 && lastMessage
+      ? partKey(
+          lastMessage,
+          lastParts[lastParts.length - 1],
+          lastParts.length - 1,
+        )
+      : null
+
+  $: if (lastPartKey !== lastSeenLastPartKey && lastSeenLastPartKey != null) {
+    reasoningPartEndTimes = {
+      ...reasoningPartEndTimes,
+      [lastSeenLastPartKey]: Date.now(),
+    }
+  }
+  $: lastSeenLastPartKey = lastPartKey
+
+  $: if (
+    status === "ready" &&
+    lastPartKey != null &&
+    !(lastPartKey in reasoningPartEndTimes)
+  ) {
+    reasoningPartEndTimes = {
+      ...reasoningPartEndTimes,
+      [lastPartKey]: Date.now(),
+    }
+  }
+
+  $: {
+    let updated = false
+    const next = { ...reasoningPartStartTimes }
+    for (const message of messages) {
+      const parts = message.parts ?? []
+      for (let i = 0; i < parts.length; i++) {
+        const part = parts[i]
+        if (part.type === "reasoning") {
+          const key = partKey(message, part, i)
+          if (!(key in next)) {
+            next[key] = Date.now()
+            updated = true
+          }
+        }
+      }
+    }
+    if (updated) reasoningPartStartTimes = next
+  }
+
+  function reasoningDurationSeconds(key: string): number | null {
+    const start = reasoningPartStartTimes[key]
+    const end = reasoningPartEndTimes[key]
+    if (start == null) return null
+    const endMs = end ?? Date.now()
+    return Math.max(0, Math.round((endMs - start) / 1000))
+  }
+
+  function durationLabel(seconds: number): string {
+    return seconds === 1 ? "1 second" : `${seconds} seconds`
+  }
+
+  $: showStreamingCursor =
+    isLoading && lastMessage?.role === "assistant" && lastParts.length === 0
+
+  function isReasoningStreaming(
+    message: ChatMessage,
+    partIndex: number,
+    parts: ChatMessagePart[],
+  ): boolean {
+    const isLastMessage =
+      messages.length > 0 && message.id === messages[messages.length - 1]?.id
+    const isLastPart = partIndex === parts.length - 1
+    return isLastMessage && status === "streaming" && isLastPart
+  }
+
+  function partKey(
+    message: ChatMessage,
+    part: ChatMessagePart,
+    partIndex: number,
+  ): string {
+    if (part.type === "reasoning") return `${message.id}-reasoning-${partIndex}`
+    if (
+      typeof part.type === "string" &&
+      part.type.startsWith("tool-") &&
+      "toolCallId" in part
+    ) {
+      return `${message.id}-tool-${(part as { toolCallId: string }).toolCallId}`
+    }
+    return `${message.id}-part-${partIndex}`
+  }
+
+  function shouldAutoCollapse(
+    message: ChatMessage,
+    partIndex: number,
+    parts: ChatMessagePart[],
+  ): boolean {
+    const isLastMessage =
+      messages.length > 0 && message.id === messages[messages.length - 1]?.id
+    const isLastPart = partIndex === parts.length - 1
+    const isCurrentStreaming =
+      isLastMessage && status === "streaming" && isLastPart
+    return !isCurrentStreaming
+  }
+
+  function isPartCollapsed(
+    state: Record<string, boolean>,
+    message: ChatMessage,
+    part: ChatMessagePart,
+    partIndex: number,
+    parts: ChatMessagePart[],
+  ): boolean {
+    const key = partKey(message, part, partIndex)
+    if (key in state) return state[key]
+    return shouldAutoCollapse(message, partIndex, parts)
+  }
+
+  function togglePartCollapsed(
+    message: ChatMessage,
+    part: ChatMessagePart,
+    partIndex: number,
+  ): void {
+    const key = partKey(message, part, partIndex)
+    const parts = message.parts ?? []
+    const current = isPartCollapsed(
+      collapsedPartKeys,
+      message,
+      part,
+      partIndex,
+      parts,
+    )
+    collapsedPartKeys = { ...collapsedPartKeys, [key]: !current }
+  }
+
+  function formatToolName(type: string): string {
+    const name = type.startsWith("tool-") ? type.slice(5) : type
+    return name.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase())
+  }
+
+  function hasToolInput(part: ChatMessagePart): boolean {
+    if (!("input" in part) || part.input === undefined) return false
+    if (typeof part.input !== "object" || part.input === null) return true
+    return Object.keys(part.input).length > 0
+  }
+
+  function formatToolInput(input: unknown): string {
+    return typeof input === "string" ? input : JSON.stringify(input, null, 2)
+  }
+
+  function formatToolOutput(output: unknown): string {
+    if (typeof output === "string") {
+      try {
+        const parsed = JSON.parse(output)
+        return JSON.stringify(parsed, null, 2)
+      } catch {
+        return output
+      }
+    }
+    return JSON.stringify(output, null, 2)
+  }
+
+  function getToolOutputError(part: ChatMessagePart): string {
+    if (
+      !("output" in part) ||
+      typeof part.output !== "object" ||
+      part.output === null
+    )
+      return "Error"
+    return "error" in part.output &&
+      typeof (part.output as { error?: string }).error === "string"
+      ? (part.output as { error: string }).error
+      : "Error"
+  }
+
+  onMount(() => {
+    const container = messagesContainer
+    const end = messagesEndRef
+    if (container && end) {
+      scrollObserver = new MutationObserver(() => {
+        end.scrollIntoView({ block: "end", behavior: "auto" })
+      })
+      scrollObserver.observe(container, {
+        childList: true,
+        subtree: true,
+        attributes: true,
+        characterData: true,
+      })
+    }
+  })
+
+  onDestroy(() => {
+    scrollObserver?.disconnect()
+    scrollObserver = null
+  })
+
+  function handleTextareaKeydown(e: KeyboardEvent): void {
+    if (e.key === "Enter" && !e.shiftKey) {
+      e.preventDefault()
+      if (!isLoading && input.trim()) handleSubmit()
+    }
+  }
+
+  function adjustTextareaHeight(e?: Event): void {
+    const el = (e?.currentTarget as HTMLTextAreaElement) ?? textareaRef
+    if (!el) return
+    el.style.height = "auto"
+    el.style.height = `${Math.min(el.scrollHeight + 2, window.innerHeight * 0.4)}px`
+  }
+
+  function stop() {
+    if (abortController) {
+      abortController.abort()
+    }
+  }
+
+  function updateLastAssistant(update: (draft: ChatMessage) => void) {
+    const last = messages[messages.length - 1]
+    if (last?.role === "assistant") {
+      const draft = { ...last, parts: last.parts ? [...last.parts] : [] }
+      update(draft)
+      messages = [...messages.slice(0, -1), draft]
+    }
+  }
+
+  function handleSubmit(e?: Event) {
+    if (e) e.preventDefault()
+    const text = input.trim()
+    if (!text || isLoading) return
+    error = null
+    const userMessage: ChatMessage = {
+      id: chatGenerateId(),
+      role: "user",
+      content: text,
+    }
+    const assistantMessage: ChatMessage = {
+      id: chatGenerateId(),
+      role: "assistant",
+      parts: [],
+    }
+    messages = [...messages, userMessage, assistantMessage]
+    input = ""
+    status = "submitted"
+    setTimeout(() => adjustTextareaHeight(), 0)
+    abortController = new AbortController()
+
+    streamChat({
+      apiUrl: CHAT_API_URL,
+      messages: messages.slice(0, -1),
+      onAssistantMessage: (update) => {
+        status = "streaming"
+        updateLastAssistant(update)
+      },
+      onFinish: () => {
+        status = "ready"
+        abortController = null
+      },
+      onError: (err) => {
+        status = "error"
+        error = err
+        abortController = null
+      },
+      signal: abortController.signal,
+    })
+  }
+</script>
+
+<AppPage
+  title="Chat"
+  subtitle="Streaming chat"
+  limit_max_width={true}
+  no_y_padding={true}
+>
+  <div
+    class="flex flex-col h-[calc(100vh-14rem)] overflow-hidden w-full md:max-w-3xl mx-auto px-4"
+  >
+    {#if error}
+      <div
+        class="flex-none rounded-lg bg-error/10 border border-error/30 p-3 text-error text-sm"
+      >
+        {error?.message}
+      </div>
+    {/if}
+
+    <div
+      bind:this={messagesContainer}
+      class="chat-messages-scroll flex-1 min-h-0 flex flex-col gap-4 overflow-y-auto overflow-x-hidden"
+      role="log"
+      aria-live="polite"
+    >
+      {#each messages as message (message.id)}
+        <div
+          in:fly={{ y: 8, duration: 200 }}
+          out:fly={{ y: -4, duration: 150 }}
+          class={message.role === "user"
+            ? "rounded-xl bg-base-content/[0.06] px-3 py-2.5 max-w-2xl ml-auto"
+            : "flex flex-col gap-3"}
+        >
+          <div class="flex flex-col gap-3">
+            {#if message.parts && message.parts.length > 0}
+              {#each message.parts as part, partIndex (partKey(message, part, partIndex))}
+                {#if part.type === "text"}
+                  <ChatMarkdown text={part.text ?? ""} />
+                {:else if part.type === "reasoning"}
+                  {@const collapsed = isPartCollapsed(
+                    collapsedPartKeys,
+                    message,
+                    part,
+                    partIndex,
+                    message.parts ?? [],
+                  )}
+                  {@const key = partKey(message, part, partIndex)}
+                  {@const streaming = isReasoningStreaming(
+                    message,
+                    partIndex,
+                    message.parts ?? [],
+                  )}
+                  {@const duration = reasoningDurationSeconds(key)}
+                  <div
+                    class="mt-2 overflow-hidden text-sm text-base-content/60"
+                  >
+                    <button
+                      type="button"
+                      class="group/btn w-full flex items-center gap-1.5 py-1 text-left text-base-content/60 hover:text-base-content/80 transition-colors cursor-pointer"
+                      on:click={() =>
+                        togglePartCollapsed(message, part, partIndex)}
+                    >
+                      <span class="flex items-center gap-1.5 min-w-0">
+                        {#if streaming}
+                          <span class="inline-flex items-baseline gap-px">
+                            Thinking
+                            <span
+                              class="thinking-dot"
+                              style="animation-delay: 0ms">.</span
+                            ><span
+                              class="thinking-dot"
+                              style="animation-delay: 160ms">.</span
+                            ><span
+                              class="thinking-dot"
+                              style="animation-delay: 320ms">.</span
+                            >
+                          </span>
+                        {:else}
+                          <span
+                            ><span class="font-semibold">Thought</span>
+                            {#if duration != null}
+                              for {durationLabel(duration)}
+                            {:else}
+                              …
+                            {/if}</span
+                          >
+                        {/if}
+                        {#if collapsed}
+                          <span
+                            class="shrink-0 text-base-content/40 transition-opacity opacity-0 group-hover/btn:opacity-100"
+                            aria-hidden="true">▶</span
+                          >
+                        {:else}
+                          <span
+                            class="shrink-0 text-base-content/40"
+                            aria-hidden="true">▼</span
+                          >
+                        {/if}
+                      </span>
+                    </button>
+                    {#if !collapsed}
+                      <div class="pt-1">
+                        <ChatMarkdown text={part.reasoning ?? ""} />
+                      </div>
+                    {/if}
+                  </div>
+                {:else if typeof part.type === "string" && part.type.startsWith("tool-")}
+                  {@const toolCollapsed = isPartCollapsed(
+                    collapsedPartKeys,
+                    message,
+                    part,
+                    partIndex,
+                    message.parts ?? [],
+                  )}
+                  {@const hasOutput = part.output !== undefined}
+                  {@const hasError =
+                    hasOutput &&
+                    typeof part.output === "object" &&
+                    part.output !== null &&
+                    "error" in part.output}
+                  <div class="mt-2 overflow-hidden text-sm">
+                    <button
+                      type="button"
+                      class="group/btn w-full flex items-center gap-1.5 py-1 text-left text-base-content/60 hover:text-base-content/80 transition-colors cursor-pointer"
+                      on:click={() =>
+                        togglePartCollapsed(message, part, partIndex)}
+                    >
+                      <span class="flex items-center gap-1.5">
+                        {formatToolName(part.type)} was called
+                        {#if toolCollapsed}
+                          <span
+                            class="shrink-0 text-base-content/40 transition-opacity opacity-0 group-hover/btn:opacity-100"
+                            aria-hidden="true">▶</span
+                          >
+                        {:else}
+                          <span
+                            class="shrink-0 text-base-content/40"
+                            aria-hidden="true">▼</span
+                          >
+                        {/if}
+                      </span>
+                    </button>
+                    {#if !toolCollapsed}
+                      <div
+                        class="mt-2 overflow-hidden rounded-md {hasError
+                          ? 'bg-error/5 text-error'
+                          : 'bg-base-content/[0.04]'}"
+                      >
+                        <div class="px-3 py-2.5 flex flex-col gap-2.5">
+                          <div>
+                            <span
+                              class="text-base-content/50 text-xs font-medium"
+                              >Input</span
+                            >
+                            <div class="mt-0.5">
+                              {#if hasToolInput(part)}
+                                <pre
+                                  class="text-xs overflow-x-auto rounded py-1.5 font-mono text-base-content/80">{formatToolInput(
+                                    part.input,
+                                  )}</pre>
+                              {:else}
+                                <span
+                                  class="text-base-content/50 italic text-xs"
+                                  >Calling…</span
+                                >
+                              {/if}
+                            </div>
+                          </div>
+                          <div>
+                            <span
+                              class="text-base-content/50 text-xs font-medium"
+                              >Output</span
+                            >
+                            <div class="mt-0.5">
+                              {#if hasError}
+                                <div class="text-xs">
+                                  {getToolOutputError(part)}
+                                </div>
+                              {:else if hasOutput}
+                                <pre
+                                  class="text-xs overflow-x-auto rounded py-1.5 font-mono text-base-content/80">{formatToolOutput(
+                                    part.output,
+                                  )}</pre>
+                              {:else}
+                                <div
+                                  class="flex items-center gap-2 text-base-content/50 italic text-xs"
+                                >
+                                  <span
+                                    class="inline-block w-3 h-3 rounded-full border border-base-content/30 border-t-base-content/60 animate-spin"
+                                  />
+                                  <span>…</span>
+                                </div>
+                              {/if}
+                            </div>
+                          </div>
+                        </div>
+                      </div>
+                    {/if}
+                  </div>
+                {/if}
+              {/each}
+            {:else if message.role === "assistant" && showStreamingCursor && message.id === lastMessage?.id}
+              <div class="flex items-center py-0.5" aria-hidden="true">
+                <span
+                  class="inline-block w-2 h-2 rounded-full bg-base-content/60 animate-pulse"
+                  style="animation-duration: 1.2s"
+                />
+              </div>
+            {:else if message.content}
+              <div class="whitespace-pre-wrap">{message.content}</div>
+            {/if}
+          </div>
+        </div>
+      {/each}
+      <div
+        bind:this={messagesEndRef}
+        class="shrink-0 min-w-[24px] min-h-[24px]"
+        aria-hidden="true"
+      />
+    </div>
+
+    <form
+      class="flex-none relative w-full pt-2 pb-3"
+      on:submit|preventDefault={handleSubmit}
+    >
+      <textarea
+        bind:this={textareaRef}
+        class="input input-bordered w-full min-h-[80px] max-h-[40vh] resize-none overflow-y-auto py-3 pr-12"
+        placeholder="Type a message…"
+        bind:value={input}
+        disabled={isLoading}
+        rows={3}
+        on:input={() => adjustTextareaHeight()}
+        on:keydown={handleTextareaKeydown}
+      />
+      {#if isLoading}
+        <button
+          type="button"
+          class="absolute right-3 bottom-6 flex size-8 items-center justify-center rounded-full bg-base-300 text-base-content hover:opacity-90 transition-opacity"
+          on:click={stop}
+          aria-label="Stop"
+        >
+          <span class="size-4 block"><StopIcon /></span>
+        </button>
+      {:else}
+        <button
+          type="submit"
+          class="absolute right-3 bottom-6 flex size-8 items-center justify-center rounded-full bg-primary text-primary-content hover:opacity-90 disabled:bg-base-300 disabled:text-base-content/40 disabled:pointer-events-none transition-colors"
+          disabled={!input.trim()}
+          aria-label="Send"
+        >
+          <span class="size-4 block"><ArrowUpIcon /></span>
+        </button>
+      {/if}
+    </form>
+  </div>
+</AppPage>
+
+<style>
+  .chat-messages-scroll::-webkit-scrollbar {
+    width: 6px;
+  }
+
+  .chat-messages-scroll::-webkit-scrollbar-track {
+    background: transparent;
+  }
+
+  .chat-messages-scroll::-webkit-scrollbar-thumb {
+    background-color: oklch(var(--bc) / 0.2);
+    border-radius: 3px;
+  }
+
+  .chat-messages-scroll::-webkit-scrollbar-thumb:hover {
+    background-color: oklch(var(--bc) / 0.35);
+  }
+
+  .chat-messages-scroll {
+    scrollbar-width: thin;
+    scrollbar-color: oklch(var(--bc) / 0.2) transparent;
+  }
+</style>
diff --git a/libs/core/kiln_ai/adapters/chat/chat_formatter.py b/libs/core/kiln_ai/adapters/chat/chat_formatter.py
index 22ba371a3..2c3b4c264 100644
--- a/libs/core/kiln_ai/adapters/chat/chat_formatter.py
+++ b/libs/core/kiln_ai/adapters/chat/chat_formatter.py
@@ -256,6 +256,9 @@ class MultiturnFormatter(ChatFormatter):
     Takes prior_trace (existing conversation) and appends the new user message.
     Produces a single turn: the new user message. Tool calls and multi-turn
     model responses are handled by _run_model_turn's internal loop.
+
+    When the prior_trace ends with a tool result, the formatter skips adding
+    a user message so the model can respond directly to the tool output.
     """
 
     def __init__(
@@ -270,16 +273,22 @@ def __init__(
         )
         self._prior_trace = prior_trace
 
+    def _is_tool_continuation(self) -> bool:
+        if not self._prior_trace:
+            return False
+        last = self._prior_trace[-1]
+        return isinstance(last, dict) and last.get("role") == "tool"
+
     def initial_messages(self) -> list[ChatCompletionMessageIncludingLiteLLM]:
         """Messages to seed the conversation (prior trace)."""
         return list(self._prior_trace)
 
     def next_turn(self, previous_output: str | None = None) -> Optional[ChatTurn]:
         if self._state == "start":
-            # prior trace is already in the messages list and contains system and so on, we only need
-            # to append the latest new user message
-            user_msg = BasicChatMessage("user", format_user_message(self.user_input))
             self._state = "awaiting_final"
+            if self._is_tool_continuation():
+                return ChatTurn(messages=[], final_call=True)
+            user_msg = BasicChatMessage("user", format_user_message(self.user_input))
             self._messages.append(user_msg)
             return ChatTurn(messages=[user_msg], final_call=True)
 
diff --git a/libs/core/kiln_ai/adapters/chat/test_chat_formatter.py b/libs/core/kiln_ai/adapters/chat/test_chat_formatter.py
index 2903b6eee..a15c5a9b9 100644
--- a/libs/core/kiln_ai/adapters/chat/test_chat_formatter.py
+++ b/libs/core/kiln_ai/adapters/chat/test_chat_formatter.py
@@ -190,6 +190,41 @@ def test_multiturn_formatter_preserves_tool_call_messages():
     assert first.final_call
 
 
+def test_multiturn_formatter_tool_continuation_skips_user_message():
+    prior_trace = [
+        {"role": "system", "content": "You are helpful."},
+        {"role": "user", "content": "read task run 123"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_xyz",
+                    "function": {
+                        "arguments": '{"task_run_id": "123"}',
+                        "name": "read_task_run",
+                    },
+                    "type": "function",
+                }
+            ],
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_xyz",
+            "content": '{"id": "123", "input": "test"}',
+        },
+    ]
+    formatter = MultiturnFormatter(prior_trace=prior_trace, user_input="")
+    assert formatter.initial_messages() == prior_trace
+
+    first = formatter.next_turn()
+    assert first is not None
+    assert len(first.messages) == 0
+    assert first.final_call
+
+    assert formatter.next_turn("Here is your task run.") is None
+
+
 def test_format_user_message():
     # String
     assert format_user_message("test input") == "test input"
diff --git a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
index d9633ac81..61fa62092 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
@@ -55,6 +55,7 @@
     set_agent_run_id,
 )
 from kiln_ai.tools import KilnToolInterface
+from kiln_ai.tools.client_tool import ClientToolCallRequired
 from kiln_ai.tools.mcp_session_manager import MCPSessionManager
 from kiln_ai.tools.tool_registry import tool_from_id
 from kiln_ai.utils.config import Config
@@ -670,7 +671,8 @@ class AiSdkStreamResult:
     """Async-iterable wrapper around the AI SDK streaming flow.
 
     Yields ``AiSdkStreamEvent`` instances.  After iteration the resulting
-    ``TaskRun`` is available via the ``.task_run`` property.
+    ``TaskRun`` is available via the ``.task_run`` property (unless
+    ``.client_tool_pending`` is ``True``).
     """
 
     def __init__(
@@ -685,10 +687,16 @@ def __init__(
         self._input_source = input_source
         self._prior_trace = prior_trace
         self._task_run: TaskRun | None = None
+        self.client_tool_pending: bool = False
 
     @property
     def task_run(self) -> TaskRun:
         if self._task_run is None:
+            if self.client_tool_pending:
+                raise RuntimeError(
+                    "No task_run available: stream ended with a client tool call. "
+                    "Check .client_tool_pending before accessing .task_run"
+                )
             raise RuntimeError(
                 "Stream has not been fully consumed yet. "
                 "Iterate over the stream before accessing .task_run"
@@ -713,17 +721,32 @@ async def __aiter__(self) -> AsyncIterator[AiSdkStreamEvent]:
             yield AiSdkStreamEvent(AiSdkEventType.START_STEP)
 
             last_event_was_tool_call = False
-            async for event in adapter_stream:
-                if isinstance(event, ModelResponseStream):
-                    if last_event_was_tool_call:
-                        converter.reset_for_next_step()
-                        last_event_was_tool_call = False
-                    for ai_event in converter.convert_chunk(event):
-                        yield ai_event
-                elif isinstance(event, ToolCallEvent):
-                    last_event_was_tool_call = True
-                    for ai_event in converter.convert_tool_event(event):
-                        yield ai_event
+            try:
+                async for event in adapter_stream:
+                    if isinstance(event, ModelResponseStream):
+                        if last_event_was_tool_call:
+                            converter.reset_for_next_step()
+                            last_event_was_tool_call = False
+                        for ai_event in converter.convert_chunk(event):
+                            yield ai_event
+                    elif isinstance(event, ToolCallEvent):
+                        last_event_was_tool_call = True
+                        for ai_event in converter.convert_tool_event(event):
+                            yield ai_event
+            except ClientToolCallRequired as e:
+                self.client_tool_pending = True
+                yield AiSdkStreamEvent(
+                    AiSdkEventType.CLIENT_TOOL_CALL,
+                    {
+                        "toolCallId": e.tool_call_id,
+                        "toolName": e.tool_name,
+                        "input": e.arguments,
+                    },
+                )
+                for ai_event in converter.finalize():
+                    yield ai_event
+                yield AiSdkStreamEvent(AiSdkEventType.FINISH_STEP)
+                return
 
             for ai_event in converter.finalize():
                 yield ai_event
diff --git a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
index cebccdf57..f0c9abc45 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
@@ -679,7 +679,13 @@ async def process_tool_calls(
             async def run_tool_and_format(
                 t=tool, c=context, args=parsed_args, tc_id=tool_call.id
             ):
-                result = await t.run(c, **args)
+                from kiln_ai.tools.client_tool import ClientToolCallRequired
+
+                try:
+                    result = await t.run(c, **args)
+                except ClientToolCallRequired as e:
+                    e.tool_call_id = tc_id
+                    raise
                 return ChatCompletionToolMessageParamWrapper(
                     role="tool",
                     tool_call_id=tc_id,
diff --git a/libs/core/kiln_ai/adapters/model_adapters/stream_events.py b/libs/core/kiln_ai/adapters/model_adapters/stream_events.py
index fbe2ff230..e133e867a 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/stream_events.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/stream_events.py
@@ -30,6 +30,8 @@ class AiSdkEventType(str, Enum):
     TOOL_OUTPUT_AVAILABLE = "tool-output-available"
     TOOL_OUTPUT_ERROR = "tool-output-error"
 
+    CLIENT_TOOL_CALL = "client-tool-call"
+
     START_STEP = "start-step"
     FINISH_STEP = "finish-step"
 
diff --git a/libs/core/kiln_ai/datamodel/tool_id.py b/libs/core/kiln_ai/datamodel/tool_id.py
index 0d1e34f7a..c7eedf33a 100644
--- a/libs/core/kiln_ai/datamodel/tool_id.py
+++ b/libs/core/kiln_ai/datamodel/tool_id.py
@@ -32,6 +32,7 @@ class KilnBuiltInToolId(str, Enum):
 RAG_TOOL_ID_PREFIX = "kiln_tool::rag::"
 MCP_LOCAL_TOOL_ID_PREFIX = "mcp::local::"
 KILN_TASK_TOOL_ID_PREFIX = "kiln_task::"
+CLIENT_TOOL_ID_PREFIX = "client_tool::"
 
 
 def _check_tool_id(id: str) -> str:
@@ -81,6 +82,15 @@ def _check_tool_id(id: str) -> str:
             )
         return id
 
+    # Client tools: client_tool::<tool_name>
+    if id.startswith(CLIENT_TOOL_ID_PREFIX):
+        tool_name = client_tool_name_from_id(id)
+        if not tool_name:
+            raise ValueError(
+                f"Invalid client tool ID: {id}. Expected format: 'client_tool::<tool_name>'."
+            )
+        return id
+
     raise ValueError(f"Invalid tool ID: {id}")
 
 
@@ -129,6 +139,20 @@ def build_kiln_task_tool_id(server_id: ID_TYPE) -> str:
     return f"{KILN_TASK_TOOL_ID_PREFIX}{server_id}"
 
 
+def client_tool_name_from_id(tool_id: str) -> str:
+    """Get the tool name from a client tool ID."""
+    if not tool_id.startswith(CLIENT_TOOL_ID_PREFIX):
+        raise ValueError(
+            f"Invalid client tool ID format: {tool_id}. Expected format: 'client_tool::<tool_name>'."
+        )
+    name = tool_id[len(CLIENT_TOOL_ID_PREFIX) :]
+    if not name or not name.strip():
+        raise ValueError(
+            f"Invalid client tool ID format: {tool_id}. Expected format: 'client_tool::<tool_name>'."
+        )
+    return name
+
+
 def kiln_task_server_id_from_tool_id(tool_id: str) -> str:
     """
     Get the server ID from the tool ID.
diff --git a/libs/core/kiln_ai/tools/client_tool.py b/libs/core/kiln_ai/tools/client_tool.py
new file mode 100644
index 000000000..7ccb39653
--- /dev/null
+++ b/libs/core/kiln_ai/tools/client_tool.py
@@ -0,0 +1,113 @@
+from typing import Any, Dict
+
+from kiln_ai.datamodel.tool_id import (
+    ToolId,
+    client_tool_name_from_id,
+)
+from kiln_ai.tools.base_tool import (
+    KilnToolInterface,
+    ToolCallContext,
+    ToolCallDefinition,
+    ToolCallResult,
+)
+
+
+class ClientToolCallRequired(Exception):
+    """Raised when a tool requires client-side execution.
+
+    The remote backend catches this and emits a client-tool-call SSE event
+    so the proxy can execute the tool locally and send back the result.
+    """
+
+    def __init__(self, tool_call_id: str, tool_name: str, arguments: dict[str, Any]):
+        self.tool_call_id = tool_call_id
+        self.tool_name = tool_name
+        self.arguments = arguments
+        super().__init__(f"Client tool '{tool_name}' requires client-side execution")
+
+
+class ClientToolPlaceholder(KilnToolInterface):
+    """A tool placeholder that provides a schema for LLM function calling
+    but raises ClientToolCallRequired when executed.
+
+    Used for tools that must run on the client (e.g., reading local files).
+    The hosted backend includes these in the LLM's tool list so the model
+    can decide to call them, but execution is deferred to the client.
+    """
+
+    def __init__(
+        self,
+        tool_id: str,
+        name: str,
+        description: str,
+        parameters_schema: Dict[str, Any],
+    ):
+        self._tool_id = tool_id
+        self._name = name
+        self._description = description
+        self._parameters_schema = parameters_schema
+
+    async def id(self) -> ToolId:
+        return self._tool_id
+
+    async def name(self) -> str:
+        return self._name
+
+    async def description(self) -> str:
+        return self._description
+
+    async def toolcall_definition(self) -> ToolCallDefinition:
+        return {
+            "type": "function",
+            "function": {
+                "name": self._name,
+                "description": self._description,
+                "parameters": self._parameters_schema,
+            },
+        }
+
+    async def run(
+        self, context: ToolCallContext | None = None, **kwargs: Any
+    ) -> ToolCallResult:
+        raise ClientToolCallRequired(
+            tool_call_id="",
+            tool_name=self._name,
+            arguments=dict(kwargs),
+        )
+
+
+# Registry of known client tool definitions.
+# The hosted backend uses these schemas so the LLM knows about the tools.
+# Execution happens on the client side.
+_CLIENT_TOOL_DEFINITIONS: Dict[str, Dict[str, Any]] = {
+    "read_task_run": {
+        "description": "Read a task run from the user's local Kiln project by its ID. Returns the task run data as JSON.",
+        "parameters_schema": {
+            "type": "object",
+            "properties": {
+                "task_run_id": {
+                    "type": "string",
+                    "description": "The numeric ID of the task run to read",
+                },
+            },
+            "required": ["task_run_id"],
+        },
+    },
+}
+
+
+def client_tool_from_id(tool_id: str) -> ClientToolPlaceholder:
+    """Create a ClientToolPlaceholder from a client tool ID."""
+    tool_name = client_tool_name_from_id(tool_id)
+    definition = _CLIENT_TOOL_DEFINITIONS.get(tool_name)
+    if definition is None:
+        raise ValueError(
+            f"Unknown client tool: {tool_name}. "
+            f"Known client tools: {list(_CLIENT_TOOL_DEFINITIONS.keys())}"
+        )
+    return ClientToolPlaceholder(
+        tool_id=tool_id,
+        name=tool_name,
+        description=definition["description"],
+        parameters_schema=definition["parameters_schema"],
+    )
diff --git a/libs/core/kiln_ai/tools/test_client_tool.py b/libs/core/kiln_ai/tools/test_client_tool.py
new file mode 100644
index 000000000..315cd8fc5
--- /dev/null
+++ b/libs/core/kiln_ai/tools/test_client_tool.py
@@ -0,0 +1,90 @@
+import pytest
+
+from kiln_ai.datamodel.tool_id import _check_tool_id, client_tool_name_from_id
+from kiln_ai.tools.client_tool import (
+    ClientToolCallRequired,
+    ClientToolPlaceholder,
+    client_tool_from_id,
+)
+from kiln_ai.tools.tool_registry import tool_from_id
+
+
+class TestClientToolId:
+    def test_valid_client_tool_id(self):
+        assert (
+            _check_tool_id("client_tool::read_task_run") == "client_tool::read_task_run"
+        )
+
+    def test_invalid_client_tool_id_no_name(self):
+        with pytest.raises(ValueError, match="Invalid client tool ID"):
+            _check_tool_id("client_tool::")
+
+    def test_client_tool_name_from_id(self):
+        assert client_tool_name_from_id("client_tool::read_task_run") == "read_task_run"
+
+    def test_client_tool_name_from_id_invalid_prefix(self):
+        with pytest.raises(ValueError, match="Invalid client tool ID"):
+            client_tool_name_from_id("kiln_tool::add_numbers")
+
+
+class TestClientToolPlaceholder:
+    async def test_provides_schema(self):
+        tool = client_tool_from_id("client_tool::read_task_run")
+        definition = await tool.toolcall_definition()
+
+        assert definition["type"] == "function"
+        assert definition["function"]["name"] == "read_task_run"
+        assert "task_run_id" in definition["function"]["parameters"]["properties"]
+
+    async def test_run_raises_client_tool_call_required(self):
+        tool = client_tool_from_id("client_tool::read_task_run")
+        with pytest.raises(ClientToolCallRequired) as exc_info:
+            await tool.run(task_run_id="12345")
+
+        assert exc_info.value.tool_name == "read_task_run"
+        assert exc_info.value.arguments["task_run_id"] == "12345"
+
+    async def test_id(self):
+        tool = client_tool_from_id("client_tool::read_task_run")
+        assert await tool.id() == "client_tool::read_task_run"
+
+    async def test_name(self):
+        tool = client_tool_from_id("client_tool::read_task_run")
+        assert await tool.name() == "read_task_run"
+
+
+class TestClientToolFromId:
+    def test_unknown_client_tool(self):
+        with pytest.raises(ValueError, match="Unknown client tool"):
+            client_tool_from_id("client_tool::nonexistent")
+
+    def test_known_client_tool(self):
+        tool = client_tool_from_id("client_tool::read_task_run")
+        assert isinstance(tool, ClientToolPlaceholder)
+
+
+class TestToolRegistryIntegration:
+    def test_tool_from_id_resolves_client_tool(self):
+        tool = tool_from_id("client_tool::read_task_run")
+        assert isinstance(tool, ClientToolPlaceholder)
+
+
+class TestClientToolCallRequired:
+    def test_exception_attributes(self):
+        exc = ClientToolCallRequired(
+            tool_call_id="tc_123",
+            tool_name="read_task_run",
+            arguments={"path": "/test"},
+        )
+        assert exc.tool_call_id == "tc_123"
+        assert exc.tool_name == "read_task_run"
+        assert exc.arguments == {"path": "/test"}
+
+    def test_tool_call_id_mutable(self):
+        exc = ClientToolCallRequired(
+            tool_call_id="",
+            tool_name="read_task_run",
+            arguments={},
+        )
+        exc.tool_call_id = "tc_updated"
+        assert exc.tool_call_id == "tc_updated"
diff --git a/libs/core/kiln_ai/tools/tool_registry.py b/libs/core/kiln_ai/tools/tool_registry.py
index a516dfdd4..da0847445 100644
--- a/libs/core/kiln_ai/tools/tool_registry.py
+++ b/libs/core/kiln_ai/tools/tool_registry.py
@@ -1,6 +1,7 @@
 from kiln_ai.datamodel.rag import RagConfig
 from kiln_ai.datamodel.task import Task
 from kiln_ai.datamodel.tool_id import (
+    CLIENT_TOOL_ID_PREFIX,
     KILN_TASK_TOOL_ID_PREFIX,
     MCP_LOCAL_TOOL_ID_PREFIX,
     MCP_REMOTE_TOOL_ID_PREFIX,
@@ -17,6 +18,7 @@
     MultiplyTool,
     SubtractTool,
 )
+from kiln_ai.tools.client_tool import client_tool_from_id
 from kiln_ai.tools.kiln_task_tool import KilnTaskTool
 from kiln_ai.tools.mcp_server_tool import MCPServerTool
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
@@ -93,6 +95,9 @@ def tool_from_id(tool_id: str, task: Task | None = None) -> KilnToolInterface:
 
             return KilnTaskTool(project.id, tool_id, server)
 
+    elif tool_id.startswith(CLIENT_TOOL_ID_PREFIX):
+        return client_tool_from_id(tool_id)
+
     elif tool_id.startswith(RAG_TOOL_ID_PREFIX):
         project = task.parent_project() if task is not None else None
         if project is None: