Skip to content

fix(trace): Handle too many errors #95004

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 38 additions & 19 deletions src/sentry/api/endpoints/organization_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import sentry_sdk
from django.http import HttpRequest, HttpResponse
from rest_framework.exceptions import ParseError
from rest_framework.request import Request
from rest_framework.response import Response
from snuba_sdk import Column, Function
Expand All @@ -27,9 +28,12 @@
from sentry.snuba.referrer import Referrer
from sentry.snuba.spans_rpc import run_trace_query
from sentry.utils.numbers import base32_encode
from sentry.utils.validators import is_event_id

# 1 worker each for spans, errors, performance issues
_query_thread_pool = ThreadPoolExecutor(max_workers=3)
# Mostly here for testing
ERROR_LIMIT = 10_000


class SerializedEvent(TypedDict):
Expand Down Expand Up @@ -204,30 +208,39 @@ def serialize_rpc_event(
else:
return self.serialize_rpc_issue(event, group_cache)

def errors_query(self, snuba_params: SnubaParams, trace_id: str) -> DiscoverQueryBuilder:
def errors_query(
self, snuba_params: SnubaParams, trace_id: str, error_id: str | None
) -> DiscoverQueryBuilder:
"""Run an error query, getting all the errors for a given trace id"""
# TODO: replace this with EAP calls, this query is copied from the old trace view
columns = [
"id",
"project.name",
"project.id",
"timestamp",
"timestamp_ms",
"trace.span",
"transaction",
"issue",
"title",
"message",
"tags[level]",
]
orderby = ["id"]
# If there's an error_id included in the request, bias the orderby to try to return that error_id over others so
# that we can render it in the trace view, even if we hit the error_limit
if error_id is not None:
columns.append(f'to_other(id, "{error_id}", 0, 1) AS target')
orderby.insert(0, "-target")
return DiscoverQueryBuilder(
Dataset.Events,
params={},
snuba_params=snuba_params,
query=f"trace:{trace_id}",
selected_columns=[
"id",
"project.name",
"project.id",
"timestamp",
"timestamp_ms",
"trace.span",
"transaction",
"issue",
"title",
"message",
"tags[level]",
],
selected_columns=columns,
# Don't add timestamp to this orderby as snuba will have to split the time range up and make multiple queries
orderby=["id"],
limit=10_000,
orderby=orderby,
limit=ERROR_LIMIT,
config=QueryBuilderConfig(
auto_fields=True,
),
Expand Down Expand Up @@ -292,7 +305,9 @@ def run_perf_issues_query(self, occurrence_query: DiscoverQueryBuilder):
return result

@sentry_sdk.tracing.trace
def query_trace_data(self, snuba_params: SnubaParams, trace_id: str) -> list[SerializedEvent]:
def query_trace_data(
self, snuba_params: SnubaParams, trace_id: str, error_id: str | None = None
) -> list[SerializedEvent]:
"""Queries span/error data for a given trace"""
# This is a hack, long term EAP will store both errors and performance_issues eventually but is not ready
# currently. But we want to move performance data off the old tables immediately. To keep the code simpler I'm
Expand All @@ -302,7 +317,7 @@ def query_trace_data(self, snuba_params: SnubaParams, trace_id: str) -> list[Ser
# the thread pool, database connections can hang around as the threads are not cleaned
# up. Because of that, tests can fail during tear down as there are active connections
# to the database preventing a DROP.
errors_query = self.errors_query(snuba_params, trace_id)
errors_query = self.errors_query(snuba_params, trace_id, error_id)
occurrence_query = self.perf_issues_query(snuba_params, trace_id)

spans_future = _query_thread_pool.submit(
Expand Down Expand Up @@ -380,10 +395,14 @@ def get(self, request: Request, organization: Organization, trace_id: str) -> Ht

update_snuba_params_with_timestamp(request, snuba_params)

error_id = request.GET.get("errorId")
if error_id is not None and not is_event_id(error_id):
raise ParseError(f"eventId: {error_id} needs to be a valid uuid")

def data_fn(offset: int, limit: int) -> list[SerializedEvent]:
"""offset and limit don't mean anything on this endpoint currently"""
with handle_query_errors():
spans = self.query_trace_data(snuba_params, trace_id)
spans = self.query_trace_data(snuba_params, trace_id, error_id)
return spans

return self.paginate(
Expand Down
35 changes: 35 additions & 0 deletions tests/snuba/api/endpoints/test_organization_trace.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from unittest import mock
from uuid import uuid4

from django.urls import reverse
Expand Down Expand Up @@ -232,3 +233,37 @@ def test_with_only_errors(self):
data = response.data
assert len(data) == 1
assert data[0]["event_id"] == error.event_id

def test_with_target_error(self):
start, _ = self.get_start_end_from_day_ago(1000)
error_data = load_data(
"javascript",
timestamp=start,
)
error_data["contexts"]["trace"] = {
"type": "trace",
"trace_id": self.trace_id,
"span_id": "a" * 16,
}
error_data["tags"] = [["transaction", "/transaction/gen1-0"]]
error = self.store_event(error_data, project_id=self.project.id)
for _ in range(5):
self.store_event(error_data, project_id=self.project.id)

with mock.patch("sentry.api.endpoints.organization_trace.ERROR_LIMIT", 1):
with self.feature(self.FEATURES):
response = self.client_get(
data={"timestamp": self.day_ago, "errorId": error.event_id},
)
assert response.status_code == 200, response.content
data = response.data
assert len(data) == 1
assert data[0]["event_id"] == error.event_id

def test_with_invalid_error_id(self):
with self.feature(self.FEATURES):
response = self.client_get(
data={"timestamp": self.day_ago, "errorId": ",blah blah,"},
)

assert response.status_code == 400, response.content
Loading