Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add basic opentelemetry tracing support #75

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,29 @@ Additions to the [developer toolkit][developer] change the core performance, and

This project follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for PR titles. Conventional Commits make it easier to understand the history of a project and facilitate automation around versioning and changelog generation.

## Running with OpenTelemetry

Goose uses HTTP to access model providers. If you are receiving failures, it can be helpful to see traces
of the underlying HTTP requests. For example, a 404 could be indicative of an incorrect URL or a missing model.

First, ensure you have an OpenTelemetry compatible collector listening on port 4318, such as [otel-tui][otel-tui].

```bash
docker run --rm -it --name otel-tui -p 4318:4318 ymtdzzz/otel-tui
```

Then, start goose like this:
```bash
uv run opentelemetry-instrument --service_name goose --exporter_otlp_protocol http/protobuf goose session start
# or `just otel-goose session start`
```

[issues]: https://github.com/square/goose/issues
[goose-plugins]: https://github.com/square/goose-plugins
[ai-exchange]: https://github.com/square/exchange
[developer]: src/goose/toolkit/developer.py
[uv]: https://docs.astral.sh/uv/
[ruff]: https://docs.astral.sh/ruff/
[just]: https://github.com/casey/just
[toolkits]: docs/docs/toolkits.md
[toolkits]: docs/docs/toolkits.md
[otel-tui]: https://github.com/ymtdzzz/otel-tui
3 changes: 3 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ coverage *FLAGS:

docs:
cd docs && uv sync && uv run mkdocs serve

otel-goose *FLAGS:
uv run opentelemetry-instrument --service_name goose --exporter_otlp_protocol http/protobuf goose {{FLAGS}}
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies = [
"ai-exchange>=0.9.0",
"click>=8.1.7",
"prompt-toolkit>=3.0.47",
"opentelemetry-api>=1.27.0",
]
author = [{ name = "Block", email = "[email protected]" }]
packages = [{ include = "goose", from = "src" }]
Expand Down Expand Up @@ -61,6 +62,11 @@ dev-dependencies = [
"mkdocs-redirects>=1.2.1",
"mkdocs-include-markdown-plugin>=6.2.2",
"mkdocs-callouts>=1.14.0",
"opentelemetry-sdk>=1.27.0",
"opentelemetry-exporter-otlp-proto-http>=1.27.0",
"opentelemetry-distro>=0.48b0",
"opentelemetry-instrumentation-httpx>=0.48b0",
"python-dotenv[cli]>=1.0.1"
codefromthecrypt marked this conversation as resolved.
Show resolved Hide resolved
]


55 changes: 39 additions & 16 deletions src/goose/cli/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from typing import Any, Dict, List, Optional

from exchange import Message, ToolResult, ToolUse, Text
from opentelemetry import trace
from opentelemetry.trace.status import Status as OtelStatus, StatusCode as OtelStatusCode
from prompt_toolkit.shortcuts import confirm
from rich import print
from rich.console import RenderableType
Expand Down Expand Up @@ -90,12 +92,17 @@ def __init__(
name: Optional[str] = None,
profile: Optional[str] = None,
plan: Optional[dict] = None,
tracer: trace.Tracer = trace.get_tracer("goose"),
**kwargs: Dict[str, Any],
) -> None:
self.name = name
self.status_indicator = Status("", spinner="dots")
self.notifier = SessionNotifier(self.status_indicator)

# Set the tracer as a field in session, as opposed to a module variable
# so that tests can swap this out and safely run in parallel.
self.tracer = tracer

self.exchange = build_exchange(profile=load_profile(profile), notifier=self.notifier)

if name is not None and self.session_file_path.exists():
Expand Down Expand Up @@ -150,22 +157,38 @@ def run(self) -> None:
"""
message = self.process_first_message()
while message: # Loop until no input (empty string).
self.notifier.start()
try:
self.exchange.add(message)
self.reply() # Process the user message.
except KeyboardInterrupt:
self.interrupt_reply()
except Exception:
# rewind to right before the last user message
self.exchange.rewind()
print(traceback.format_exc())
print(
"\n[red]The error above was an exception we were not able to handle.\n\n[/]"
+ "These errors are often related to connection or authentication\n"
+ "We've removed the conversation up to the most recent user message"
+ " - [yellow]depending on the error you may be able to continue[/]"
)
span_attributes = {
"goose.role": message.role,
"goose.id": message.id,
}

# For starters, only add to the trace the first text content
first_content = message.content[0] if message.content else None
if isinstance(first_content, Text):
span_attributes["goose.text"] = first_content.text

with self.tracer.start_as_current_span(message.role, attributes=span_attributes) as span:
self.notifier.start()
try:
self.exchange.add(message)
self.reply() # Process the user message.
except KeyboardInterrupt:
# TODO: should we make interrupting an error? If not, how should we mark this?
# span as it was interrupted?
span.set_status(OtelStatus(OtelStatusCode.ERROR, "KeyboardInterrupt"))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an interesting case. I don't know if an explicit interrupt would actually be an error for the user, it's expected to fail - a browser comparison would be closing a tab during a request, it would be common for the request to still complete on the server side and be green. Anyways, it's probably correct to set it as an error though not fully confident

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added a comment as I don't know yet, either, but maybe after some practice someone will.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hey idea.. maybe we set it to ok, but leave the interrupt?

self.interrupt_reply()
except Exception as e:
# rewind to right before the last user message
self.exchange.rewind()
span.set_status(OtelStatus(OtelStatusCode.ERROR, str(e)))
span.record_exception(e)
print(traceback.format_exc())
print(
"\n[red]The error above was an exception we were not able to handle.\n\n[/]"
+ "These errors are often related to connection or authentication\n"
+ "We've removed the conversation up to the most recent user message"
+ " - [yellow]depending on the error you may be able to continue[/]"
)
self.notifier.stop()

print() # Print a newline for separation.
Expand Down
101 changes: 101 additions & 0 deletions tests/cli/test_session_otel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import pytest
from httpx import HTTPStatusError
from unittest.mock import MagicMock, patch
from exchange import Message, Text
from goose.cli.session import Session
from goose.cli.prompt.goose_prompt_session import GoosePromptSession
from goose.cli.prompt.user_input import PromptAction, UserInput
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter


@pytest.fixture
def memory_exporter():
yield InMemorySpanExporter()


@pytest.fixture
def create_session_with_mock_configs(mock_sessions_path, exchange_factory, profile_factory, memory_exporter):
# Create a tracer with the same memory exporter as test assertions use. We do this for each
# test to ensure they can run in parallel without interfering with each other.
tracer_provider = TracerProvider()
memory_exporter = memory_exporter
span_processor = SimpleSpanProcessor(memory_exporter)
tracer_provider.add_span_processor(span_processor)

with patch("goose.cli.session.build_exchange", return_value=exchange_factory()), patch(
"goose.cli.session.load_profile", return_value=profile_factory()
), patch("goose.cli.session.SessionNotifier") as mock_session_notifier, patch(
"goose.cli.session.load_provider", return_value="provider"
):
mock_session_notifier.return_value = MagicMock()

def create_session(session_attributes: dict = {}):
return Session(**session_attributes, tracer=tracer_provider.get_tracer(__name__))

yield create_session


def test_trace_run(create_session_with_mock_configs, memory_exporter):
session = create_session_with_mock_configs()

message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")])

# Call the run function, for a single message which results in an exit.
with patch.object(Session, "process_first_message", return_value=message), patch.object(
Session, "reply", return_value=None
), patch.object(
GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT)
), patch.object(Session, "save_session", return_value=None):
session.run()

spans = memory_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
assert span.name == message.role
assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text}


def test_trace_run_interrupted(create_session_with_mock_configs, memory_exporter):
session = create_session_with_mock_configs()

message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")])

# Call the run function, for a single message which results in an interrupt.
with patch.object(Session, "process_first_message", return_value=message), patch.object(
Session, "reply", side_effect=KeyboardInterrupt
), patch.object(
GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT)
), patch.object(Session, "save_session", return_value=None):
session.run()

spans = memory_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
assert span.name == message.role
assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text}
assert span.status.is_ok is False
assert span.status.description == "KeyboardInterrupt"


def test_trace_run_error(create_session_with_mock_configs, memory_exporter):
session = create_session_with_mock_configs()

message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")])

# Call the run function, for a single message which results in an HTTP error.
with patch.object(Session, "process_first_message", return_value=message), patch.object(
Session, "reply", side_effect=HTTPStatusError("HTTP error", request=None, response=None)
), patch.object(
GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT)
), patch.object(Session, "save_session", return_value=None):
session.run()

spans = memory_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
assert span.name == message.role
assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text}
assert span.status.is_ok is False
assert span.status.description == "HTTP error"