Skip to content

Commit 1f3f3eb

Browse files
authored
fix(sampling): Attribute backpressure as unsampling reason more accurately (#6445)
### Description #### Context If backpressure handling is active, it lowers the effective sample rate. There are different outcomes for when a span is unsampled due to `sample_rate` or `backpressure`. #### Behavior Before this change, if under backpressure, we'd only report `backpressure` as the reason for unsampling if the monitor lowered the effective sample rate to a falsy value. Which only covers a small portion of unsample decisions made due to backpressure. The rest would be reported as a generic `sample_rate` drop, potentially leading to confusing reporting. If someone e.g. has a `traces_sample_rate` of `1.0`, they might see spans being dropped because of `sample_rate`, which is hard to justify. In this PR, we try to do a better job at determining whether backpressure was actually the reason a span was dropped and emit the corresponding outcome. The idea is, for an unsampled span: - if `sample_rand` would've been higher than the original, non-backpressure `sample_rate`, the span would've been unsampled regardless of backpressure handling, so the outcome should be `sample_rate` - if `sample_rand` actually would've been lower than the original `sample_rate` had the `sample_rate` not been lowered, the span would've been sampled. In that case backpressure handling is the direct cause and the outcome should be `backpressure` #### Issues <!-- * resolves: #1234 * resolves: LIN-1234 --> #### Reminders - Please add tests to validate your changes, and lint your code using `uv run ruff`. - Add GH Issue ID _&_ Linear ID (if applicable) - PR title should use [conventional commit](https://develop.sentry.dev/engineering-practices/commit-messages/#type) style (`feat:`, `fix:`, `ref:`, `meta:`) - For external contributors: [CONTRIBUTING.md](https://github.com/getsentry/sentry-python/blob/master/CONTRIBUTING.md), [Sentry SDK development docs](https://develop.sentry.dev/sdk/), [Discord community](https://discord.gg/Ww9hbqr)
1 parent 2ce26d1 commit 1f3f3eb

2 files changed

Lines changed: 83 additions & 4 deletions

File tree

sentry_sdk/tracing_utils.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,23 +1577,39 @@ def _make_sampling_decision(
15771577
return False, 0.0, None, "sample_rate"
15781578

15791579
# Adjust sample rate if we're under backpressure
1580+
sample_rate_before_backpressure = sample_rate
15801581
if client.monitor:
15811582
sample_rate /= 2**client.monitor.downsample_factor
15821583

15831584
if not sample_rate:
15841585
logger.debug(f"[Tracing] Discarding {name} because backpressure")
15851586
return False, 0.0, None, "backpressure"
15861587

1588+
# Make the actual decision
15871589
sampled = sample_rand < sample_rate
15881590

15891591
if sampled:
15901592
logger.debug(f"[Tracing] Starting {name}")
15911593
outcome = None
1594+
15921595
else:
1593-
logger.debug(
1594-
f"[Tracing] Discarding {name} because it's not included in the random sample (sampling rate = {sample_rate})"
1595-
)
1596-
outcome = "sample_rate"
1596+
# Determine why exactly the span will not be sampled. If we've lowered
1597+
# the effective sample_rate because of backpressure, check whether the
1598+
# span would've been sampled if backpressure wasn't active. If that's the
1599+
# case, backpressure is the actual reason, otherwise just pure sampling
1600+
# rate.
1601+
if (
1602+
sample_rate_before_backpressure != sample_rate
1603+
and sample_rand < sample_rate_before_backpressure
1604+
):
1605+
logger.debug(f"[Tracing] Discarding {name} because backpressure")
1606+
outcome = "backpressure"
1607+
1608+
else:
1609+
logger.debug(
1610+
f"[Tracing] Discarding {name} because it's not included in the random sample (sampling rate = {sample_rate})"
1611+
)
1612+
outcome = "sample_rate"
15971613

15981614
return sampled, sample_rate, sample_rand, outcome
15991615

tests/tracing/test_span_streaming.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,69 @@ def test_continue_trace_unsampled(sentry_init, capture_items):
754754
assert len(spans) == 0
755755

756756

757+
@pytest.mark.parametrize(
758+
("sample_rand", "expected_sampled", "expected_outcome"),
759+
[
760+
("0.100000", True, None),
761+
("0.250000", False, "backpressure"),
762+
("0.300000", False, "backpressure"),
763+
("0.500000", False, "sample_rate"),
764+
("0.700000", False, "sample_rate"),
765+
],
766+
)
767+
def test_backpressure_outcome(
768+
sentry_init,
769+
capture_items,
770+
capture_record_lost_event_calls,
771+
sample_rand,
772+
expected_sampled,
773+
expected_outcome,
774+
):
775+
sentry_init(
776+
traces_sample_rate=0.5,
777+
enable_backpressure_handling=True,
778+
_experiments={"trace_lifecycle": "stream"},
779+
)
780+
781+
items = capture_items("span")
782+
record_lost_event_calls = capture_record_lost_event_calls()
783+
784+
client = sentry_sdk.get_client()
785+
client.monitor._downsample_factor = 1
786+
787+
trace_id = "0af7651916cd43dd8448eb211c80319c"
788+
parent_span_id = "b7ad6b7169203331"
789+
790+
sentry_sdk.traces.continue_trace(
791+
{
792+
"sentry-trace": f"{trace_id}-{parent_span_id}",
793+
"baggage": f"sentry-trace_id={trace_id},sentry-sample_rand={sample_rand}",
794+
}
795+
)
796+
797+
with sentry_sdk.traces.start_span(name="span") as span:
798+
pass
799+
800+
sentry_sdk.get_client().flush()
801+
spans = [item.payload for item in items]
802+
803+
# Original traces_sample_rate is 0.5, downsampled sample rate is 0.25, so:
804+
# - sample_rand < 0.25 -> sampled
805+
# - 0.25 < sample_rand < 0.5 -> unsampled because of backpressure (would've been sampled if no backpressure)
806+
# - 0.5 < sample_rand -> unsampled because of sampling rate
807+
808+
assert span.sampled is expected_sampled
809+
810+
if expected_sampled:
811+
assert len(spans) == 1
812+
assert not record_lost_event_calls
813+
else:
814+
assert len(spans) == 0
815+
assert record_lost_event_calls == [
816+
(expected_outcome, "span", None, 1),
817+
]
818+
819+
757820
def test_unsampled_spans_produce_client_report(
758821
sentry_init, capture_items, capture_record_lost_event_calls
759822
):

0 commit comments

Comments
 (0)