square · codefromthecrypt · Sep 18, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -108,11 +108,29 @@ Additions to the [developer toolkit][developer] change the core performance, and
 
 This project follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for PR titles. Conventional Commits make it easier to understand the history of a project and facilitate automation around versioning and changelog generation.
 
+## Running with OpenTelemetry
+
+Goose uses HTTP to access model providers. If you are receiving failures, it can be helpful to see traces
+of the underlying HTTP requests. For example, a 404 could be indicative of an incorrect URL or a missing model.
+
+First, ensure you have an OpenTelemetry compatible collector listening on port 4318, such as [otel-tui][otel-tui].
+
+```bash
+docker run --rm -it --name otel-tui -p 4318:4318 ymtdzzz/otel-tui
+```
+
+Then, start goose like this:
+```bash
+uv run opentelemetry-instrument --service_name goose --exporter_otlp_protocol http/protobuf goose session start
+# or `just otel-goose session start`
+```
+
 [issues]: https://github.com/square/goose/issues
 [goose-plugins]: https://github.com/square/goose-plugins
 [ai-exchange]: https://github.com/square/exchange
 [developer]: src/goose/toolkit/developer.py
 [uv]: https://docs.astral.sh/uv/
 [ruff]: https://docs.astral.sh/ruff/
 [just]: https://github.com/casey/just
-[toolkits]: docs/docs/toolkits.md
+[toolkits]: docs/docs/toolkits.md
+[otel-tui]: https://github.com/ymtdzzz/otel-tui
diff --git a/justfile b/justfile
@@ -20,3 +20,6 @@ coverage *FLAGS:
 
 docs:
   cd docs && uv sync && uv run mkdocs serve
+
+otel-goose *FLAGS:
+  uv run opentelemetry-instrument --service_name goose --exporter_otlp_protocol http/protobuf goose {{FLAGS}}
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,7 @@ dependencies = [
     "ai-exchange>=0.9.0",
     "click>=8.1.7",
     "prompt-toolkit>=3.0.47",
+    "opentelemetry-api>=1.27.0",
 ]
 author = [{ name = "Block", email = "[email protected]" }]
 packages = [{ include = "goose", from = "src" }]
@@ -61,6 +62,11 @@ dev-dependencies = [
     "mkdocs-redirects>=1.2.1",
     "mkdocs-include-markdown-plugin>=6.2.2",
     "mkdocs-callouts>=1.14.0",
+    "opentelemetry-sdk>=1.27.0",
+    "opentelemetry-exporter-otlp-proto-http>=1.27.0",
+    "opentelemetry-distro>=0.48b0",
+    "opentelemetry-instrumentation-httpx>=0.48b0",
+    "python-dotenv[cli]>=1.0.1"
 ]
 
 
diff --git a/src/goose/cli/session.py b/src/goose/cli/session.py
@@ -3,6 +3,8 @@
 from typing import Any, Dict, List, Optional
 
 from exchange import Message, ToolResult, ToolUse, Text
+from opentelemetry import trace
+from opentelemetry.trace.status import Status as OtelStatus, StatusCode as OtelStatusCode
 from prompt_toolkit.shortcuts import confirm
 from rich import print
 from rich.console import RenderableType
@@ -90,12 +92,17 @@ def __init__(
         name: Optional[str] = None,
         profile: Optional[str] = None,
         plan: Optional[dict] = None,
+        tracer: trace.Tracer = trace.get_tracer("goose"),
         **kwargs: Dict[str, Any],
     ) -> None:
         self.name = name
         self.status_indicator = Status("", spinner="dots")
         self.notifier = SessionNotifier(self.status_indicator)
 
+        # Set the tracer as a field in session, as opposed to a module variable
+        # so that tests can swap this out and safely run in parallel.
+        self.tracer = tracer
+
         self.exchange = build_exchange(profile=load_profile(profile), notifier=self.notifier)
 
         if name is not None and self.session_file_path.exists():
@@ -150,22 +157,38 @@ def run(self) -> None:
         """
         message = self.process_first_message()
         while message:  # Loop until no input (empty string).
-            self.notifier.start()
-            try:
-                self.exchange.add(message)
-                self.reply()  # Process the user message.
-            except KeyboardInterrupt:
-                self.interrupt_reply()
-            except Exception:
-                # rewind to right before the last user message
-                self.exchange.rewind()
-                print(traceback.format_exc())
-                print(
-                    "\n[red]The error above was an exception we were not able to handle.\n\n[/]"
-                    + "These errors are often related to connection or authentication\n"
-                    + "We've removed the conversation up to the most recent user message"
-                    + " - [yellow]depending on the error you may be able to continue[/]"
-                )
+            span_attributes = {
+                "goose.role": message.role,
+                "goose.id": message.id,
+            }
+
+            # For starters, only add to the trace the first text content
+            first_content = message.content[0] if message.content else None
+            if isinstance(first_content, Text):
+                span_attributes["goose.text"] = first_content.text
+
+            with self.tracer.start_as_current_span(message.role, attributes=span_attributes) as span:
+                self.notifier.start()
+                try:
+                    self.exchange.add(message)
+                    self.reply()  # Process the user message.
+                except KeyboardInterrupt:
+                    # TODO: should we make interrupting an error? If not, how should we mark this?
+                    # span as it was interrupted?
+                    span.set_status(OtelStatus(OtelStatusCode.ERROR, "KeyboardInterrupt"))
+                    self.interrupt_reply()
+                except Exception as e:
+                    # rewind to right before the last user message
+                    self.exchange.rewind()
+                    span.set_status(OtelStatus(OtelStatusCode.ERROR, str(e)))
+                    span.record_exception(e)
+                    print(traceback.format_exc())
+                    print(
+                        "\n[red]The error above was an exception we were not able to handle.\n\n[/]"
+                        + "These errors are often related to connection or authentication\n"
+                        + "We've removed the conversation up to the most recent user message"
+                        + " - [yellow]depending on the error you may be able to continue[/]"
+                    )
             self.notifier.stop()
 
             print()  # Print a newline for separation.

diff --git a/tests/cli/test_session_otel.py b/tests/cli/test_session_otel.py
@@ -0,0 +1,101 @@
+import pytest
+from httpx import HTTPStatusError
+from unittest.mock import MagicMock, patch
+from exchange import Message, Text
+from goose.cli.session import Session
+from goose.cli.prompt.goose_prompt_session import GoosePromptSession
+from goose.cli.prompt.user_input import PromptAction, UserInput
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+
+@pytest.fixture
+def memory_exporter():
+    yield InMemorySpanExporter()
+
+
+@pytest.fixture
+def create_session_with_mock_configs(mock_sessions_path, exchange_factory, profile_factory, memory_exporter):
+    # Create a tracer with the same memory exporter as test assertions use. We do this for each
+    # test to ensure they can run in parallel without interfering with each other.
+    tracer_provider = TracerProvider()
+    memory_exporter = memory_exporter
+    span_processor = SimpleSpanProcessor(memory_exporter)
+    tracer_provider.add_span_processor(span_processor)
+
+    with patch("goose.cli.session.build_exchange", return_value=exchange_factory()), patch(
+        "goose.cli.session.load_profile", return_value=profile_factory()
+    ), patch("goose.cli.session.SessionNotifier") as mock_session_notifier, patch(
+        "goose.cli.session.load_provider", return_value="provider"
+    ):
+        mock_session_notifier.return_value = MagicMock()
+
+        def create_session(session_attributes: dict = {}):
+            return Session(**session_attributes, tracer=tracer_provider.get_tracer(__name__))
+
+        yield create_session
+
+
+def test_trace_run(create_session_with_mock_configs, memory_exporter):
+    session = create_session_with_mock_configs()
+
+    message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")])
+
+    # Call the run function, for a single message which results in an exit.
+    with patch.object(Session, "process_first_message", return_value=message), patch.object(
+        Session, "reply", return_value=None
+    ), patch.object(
+        GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT)
+    ), patch.object(Session, "save_session", return_value=None):
+        session.run()
+
+    spans = memory_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span = spans[0]
+    assert span.name == message.role
+    assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text}
+
+
+def test_trace_run_interrupted(create_session_with_mock_configs, memory_exporter):
+    session = create_session_with_mock_configs()
+
+    message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")])
+
+    # Call the run function, for a single message which results in an interrupt.
+    with patch.object(Session, "process_first_message", return_value=message), patch.object(
+        Session, "reply", side_effect=KeyboardInterrupt
+    ), patch.object(
+        GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT)
+    ), patch.object(Session, "save_session", return_value=None):
+        session.run()
+
+    spans = memory_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span = spans[0]
+    assert span.name == message.role
+    assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text}
+    assert span.status.is_ok is False
+    assert span.status.description == "KeyboardInterrupt"
+
+
+def test_trace_run_error(create_session_with_mock_configs, memory_exporter):
+    session = create_session_with_mock_configs()
+
+    message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")])
+
+    # Call the run function, for a single message which results in an HTTP error.
+    with patch.object(Session, "process_first_message", return_value=message), patch.object(
+        Session, "reply", side_effect=HTTPStatusError("HTTP error", request=None, response=None)
+    ), patch.object(
+        GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT)
+    ), patch.object(Session, "save_session", return_value=None):
+        session.run()
+
+    spans = memory_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span = spans[0]
+    assert span.name == message.role
+    assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text}
+    assert span.status.is_ok is False
+    assert span.status.description == "HTTP error"