From 50c773d67f3b53948e3d8cf139c3a9271c1fb5d8 Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 10:59:48 -0700
Subject: [PATCH 1/9] initial functions for filtering

---
 src/sentry/seer/math.py              | 159 +++++++++++++++++++++++++++
 src/sentry/seer/workflows/compare.py |  98 ++++++++++++++++-
 2 files changed, 256 insertions(+), 1 deletion(-)

diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 85b21b00c38e9c..4a1c10a2fae588 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -86,3 +86,162 @@ def _rrf(kl_rank: int, entropy_rank: int) -> float:
 def rank_min(xs: list[float], ascending: bool = False):
     ranks = {x: rank for rank, x in enumerate(sorted(set(xs), reverse=not ascending), 1)}
     return [ranks[x] for x in xs]
+
+
+def boxcox_transform(
+    values: list[float], lambda_param: float | None = None
+) -> tuple[list[float], float]:
+    """
+    Apply BoxCox transformation to a list of values.
+
+    Parameters:
+        values: List of positive values to transform
+        lambda_param: BoxCox lambda parameter. If None, finds optimal lambda.
+
+    Returns:
+        Tuple of (transformed values, lambda parameter used)
+    """
+
+    if lambda_param is not None:
+        if lambda_param == 0.0:
+            transformed = [math.log(max(v, 1e-10)) for v in values]
+        else:
+            transformed = [(pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in values]
+        return transformed, lambda_param
+
+    # Find optimal lambda using MLE
+    optimal_lambda = boxcox_normmax(values)
+
+    if optimal_lambda == 0.0:
+        transformed = [math.log(max(v, 1e-10)) for v in values]
+    else:
+        transformed = [(pow(max(v, 1e-10), optimal_lambda) - 1) / optimal_lambda for v in values]
+
+    return transformed, optimal_lambda
+
+
+def boxcox_llf(lambda_param: float, values: list[float]) -> float:
+    """
+    Compute the Box-Cox log-likelihood function.
+
+    Parameters:
+        lambda_param: BoxCox lambda parameter
+        values: List of positive values
+
+    Returns:
+        Log-likelihood value
+    """
+    n = len(values)
+    if n == 0:
+        return 0.0
+
+    # Transform the data
+    if lambda_param == 0.0:
+        y = [math.log(max(v, 1e-10)) for v in values]
+    else:
+        y = [(pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in values]
+
+    # Calculate mean and sum of squares
+    y_mean = sum(y) / n
+    sum_sq = sum((yi - y_mean) ** 2 for yi in y)
+
+    # Log-likelihood calculation
+    # llf = (lambda - 1) * sum(log(x)) - n/2 * log(sum_sq)
+    log_sum = sum(math.log(max(v, 1e-10)) for v in values)
+    llf = (lambda_param - 1) * log_sum - (n / 2) * math.log(max(sum_sq, 1e-10))
+
+    return llf
+
+
+def boxcox_normmax(values: list[float]) -> float:
+    """
+    Calculate the approximate optimal lambda parameter for BoxCox transformation that maximizes the log-likelihood.
+
+    Uses MLE method with ternary search rather than Brent's methodfor efficient optimization.
+
+    Parameters:
+        values: List of positive values
+
+    Returns:
+        Approximate optimal lambda parameter
+    """
+    if not values:
+        return 0.0
+
+    if any(v <= 0 for v in values):
+        raise ValueError("All values must be positive for BoxCox transformation")
+
+    left = -2.0
+    right = 2.0
+    tolerance = 1e-6
+    max_iters = 50
+    iters = 0
+
+    while right - left > tolerance and iters < max_iters:
+        m1 = left + (right - left) / 3
+        m2 = right - (right - left) / 3
+
+        llf_m1 = boxcox_llf(m1, values)
+        llf_m2 = boxcox_llf(m2, values)
+
+        if llf_m1 > llf_m2:
+            right = m2
+        else:
+            left = m1
+
+        iters += 1
+
+    return (left + right) / 2
+
+
+def calculate_z_scores(values: list[float]) -> list[float]:
+    """
+    Calculate z-scores for a list of values.
+
+    Parameters:
+        values: List of numerical values
+
+    Returns:
+        List of z-scores corresponding to input values
+    """
+    if not values:
+        return []
+
+    mean_val = sum(values) / len(values)
+    variance = sum((x - mean_val) ** 2 for x in values) / len(values)
+    std_dev = math.sqrt(variance)
+
+    if std_dev == 0:
+        return [0.0] * len(values)
+
+    return [(x - mean_val) / std_dev for x in values]
+
+
+def filter_by_z_score_threshold(
+    values: list[float], z_threshold: float = 1.5, lambda_param: float = 0.0
+) -> list[int]:
+    """
+    Get indices of values that pass BoxCox + z-score filtering.
+
+    This function applies BoxCox normalization to the values,
+    calculates z-scores, and returns indices where z-scores >= threshold.
+
+    Parameters:
+        values: List of numerical values to filter
+        z_threshold: Minimum z-score threshold for inclusion
+        lambda_param: BoxCox lambda parameter (0 for log transformation)
+
+    Returns:
+        List of indices that pass the filtering criteria
+    """
+    if not values:
+        return []
+
+    # Apply BoxCox transformation - unpack the tuple to get just the transformed values
+    transformed_values, _ = boxcox_transform(values, lambda_param)
+
+    # Calculate z-scores on transformed data
+    z_scores = calculate_z_scores(transformed_values)
+
+    # Return indices that meet the threshold
+    return [i for i, z_score in enumerate(z_scores) if z_score >= z_threshold]
diff --git a/src/sentry/seer/workflows/compare.py b/src/sentry/seer/workflows/compare.py
index d90bf7f9d9564e..153305663f32f1 100644
--- a/src/sentry/seer/workflows/compare.py
+++ b/src/sentry/seer/workflows/compare.py
@@ -2,7 +2,13 @@
 from collections.abc import Callable, Generator, Mapping, Sequence
 from typing import TypeVar
 
-from sentry.seer.math import entropy, kl_divergence, laplace_smooth, rrf_score
+from sentry.seer.math import (
+    entropy,
+    filter_by_z_score_threshold,
+    kl_divergence,
+    laplace_smooth,
+    rrf_score,
+)
 
 T = TypeVar("T")
 
@@ -13,6 +19,36 @@
 Score = tuple[str, float]
 
 
+def filter_by_z_score(
+    data: Sequence[KeyedValueCount], z_threshold: float = 1.5, lambda_param: float = 0.0
+) -> list[KeyedValueCount]:
+    """
+    Filter data by applying BoxCox transformation and z-score filtering.
+
+    This function applies BoxCox normalization to the count values in the data,
+    calculates z-scores, and filters to keep only items with z-scores >= threshold.
+
+    Parameters:
+        data: Sequence of (key, value, count) tuples
+        z_threshold: Minimum z-score threshold for inclusion
+        lambda_param: BoxCox lambda parameter (0 for log transformation)
+
+    Returns:
+        Filtered list of (key, value, count) tuples
+    """
+    if not data:
+        return []
+
+    # Extract counts (the third element of each tuple)
+    counts = [count for _, _, count in data]
+
+    # Get indices that pass the filtering criteria
+    passing_indices = filter_by_z_score_threshold(counts, z_threshold, lambda_param)
+
+    # Filter data based on passing indices
+    return [data[i] for i in passing_indices]
+
+
 def keyed_kl_score(
     baseline: Sequence[KeyedValueCount],
     outliers: Sequence[KeyedValueCount],
@@ -186,3 +222,63 @@ def _ensure_symmetry(a: Distribution, b: Distribution) -> tuple[Distribution, Di
 
 def _smooth_distribution(dist: Distribution) -> Distribution:
     return dict(zip(dist.keys(), laplace_smooth(list(dist.values()))))
+
+
+def keyed_rrf_score_with_filtering(
+    baseline: Sequence[KeyedValueCount],
+    outliers: Sequence[KeyedValueCount],
+    total_baseline: int,
+    total_outliers: int,
+    entropy_alpha: float = 0.2,
+    kl_alpha: float = 0.8,
+    offset: int = 60,
+    apply_filtering: bool = True,
+    z_threshold: float = 1.5,
+    lambda_param: float = 0.0,
+    filter_baseline: bool = False,
+    filter_outliers: bool = True,
+) -> tuple[list[tuple[str, float]], list[KeyedValueCount], list[KeyedValueCount]]:
+    """
+    RRF score a multi-dimensional distribution with optional BoxCox + z-score filtering.
+
+    This function demonstrates how to apply filtering as an independent step before RRF scoring.
+
+    Parameters:
+        baseline: Baseline distribution data
+        outliers: Outliers distribution data
+        total_baseline: Total count for baseline
+        total_outliers: Total count for outliers
+        entropy_alpha: Weight for entropy in RRF
+        kl_alpha: Weight for KL divergence in RRF
+        offset: RRF offset parameter
+        apply_filtering: Whether to apply BoxCox + z-score filtering
+        z_threshold: Z-score threshold for filtering
+        lambda_param: BoxCox lambda parameter
+        filter_baseline: Whether to filter baseline data
+        filter_outliers: Whether to filter outliers data
+
+    Returns:
+        Tuple of (scores, filtered_baseline, filtered_outliers)
+        This allows you to inspect the intermediary filtering results
+    """
+    filtered_baseline = list(baseline)
+    filtered_outliers = list(outliers)
+
+    if apply_filtering:
+        if filter_baseline:
+            filtered_baseline = filter_by_z_score(baseline, z_threshold, lambda_param)
+        if filter_outliers:
+            filtered_outliers = filter_by_z_score(outliers, z_threshold, lambda_param)
+
+    # Apply RRF scoring to the filtered data
+    scores = keyed_rrf_score(
+        filtered_baseline,
+        filtered_outliers,
+        total_baseline,
+        total_outliers,
+        entropy_alpha,
+        kl_alpha,
+        offset,
+    )
+
+    return scores, filtered_baseline, filtered_outliers

From 07ad2f424fda28099343195086cd1405cb14be9d Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 11:45:42 -0700
Subject: [PATCH 2/9] make more consistent with scipy implementation

---
 src/sentry/seer/math.py | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 4a1c10a2fae588..932152b4103527 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -124,6 +124,8 @@ def boxcox_llf(lambda_param: float, values: list[float]) -> float:
     """
     Compute the Box-Cox log-likelihood function.
 
+    Uses numerically stable log-space arithmetic following scipy's implementation.
+
     Parameters:
         lambda_param: BoxCox lambda parameter
         values: List of positive values
@@ -135,25 +137,28 @@ def boxcox_llf(lambda_param: float, values: list[float]) -> float:
     if n == 0:
         return 0.0
 
-    # Transform the data
+    log_values = [math.log(max(v, 1e-10)) for v in values]
+    log_sum = sum(log_values)
+
     if lambda_param == 0.0:
-        y = [math.log(max(v, 1e-10)) for v in values]
+        log_mean = log_sum / n
+        log_var = sum((lv - log_mean) ** 2 for lv in log_values) / n
+        logvar = math.log(max(log_var, 1e-10))
     else:
-        y = [(pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in values]
-
-    # Calculate mean and sum of squares
-    y_mean = sum(y) / n
-    sum_sq = sum((yi - y_mean) ** 2 for yi in y)
-
-    # Log-likelihood calculation
-    # llf = (lambda - 1) * sum(log(x)) - n/2 * log(sum_sq)
-    log_sum = sum(math.log(max(v, 1e-10)) for v in values)
-    llf = (lambda_param - 1) * log_sum - (n / 2) * math.log(max(sum_sq, 1e-10))
+        # For λ≠0: Use log-space arithmetic for numerical stability
+        # This avoids computing (x^λ - 1)/λ directly which can overflow
+        # Uses identity: var((x^λ - 1)/λ) = var(x^λ)/λ²
+        logx = [lambda_param * lv for lv in log_values]  # log(x^λ) = λ*log(x)
+        logx_mean = sum(logx) / n
+        logx_var = sum((lx - logx_mean) ** 2 for lx in logx) / n
+        # log(var(y)) = log(var(x^λ)) - 2*log(|λ|)
+        logvar = math.log(max(logx_var, 1e-10)) - 2 * math.log(abs(lambda_param))
 
-    return llf
+    # Box-Cox log-likelihood: (λ-1)*Σlog(x) - n/2*log(var(y))
+    return (lambda_param - 1) * log_sum - (n / 2) * logvar
 
 
-def boxcox_normmax(values: list[float]) -> float:
+def boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
     """
     Calculate the approximate optimal lambda parameter for BoxCox transformation that maximizes the log-likelihood.
 
@@ -161,6 +166,7 @@ def boxcox_normmax(values: list[float]) -> float:
 
     Parameters:
         values: List of positive values
+        max_iters: Maximum number of iterations to run for ternary search
 
     Returns:
         Approximate optimal lambda parameter
@@ -174,7 +180,6 @@ def boxcox_normmax(values: list[float]) -> float:
     left = -2.0
     right = 2.0
     tolerance = 1e-6
-    max_iters = 50
     iters = 0
 
     while right - left > tolerance and iters < max_iters:

From d26e4b373091b1e38ec1b6bbd085b1bc1116106c Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 15:18:43 -0700
Subject: [PATCH 3/9] update return

---
 .../organization_group_suspect_flags.py       |   2 +
 src/sentry/seer/math.py                       |   8 +-
 src/sentry/seer/workflows/compare.py          | 118 +++++++++++-------
 3 files changed, 74 insertions(+), 54 deletions(-)

diff --git a/src/sentry/issues/endpoints/organization_group_suspect_flags.py b/src/sentry/issues/endpoints/organization_group_suspect_flags.py
index f1745934dbd67e..a193c88cdff43f 100644
--- a/src/sentry/issues/endpoints/organization_group_suspect_flags.py
+++ b/src/sentry/issues/endpoints/organization_group_suspect_flags.py
@@ -21,6 +21,7 @@ class ResponseDataItem(TypedDict):
     score: float
     baseline_percent: float
     distribution: Distribution
+    is_filtered: bool
 
 
 class ResponseData(TypedDict):
@@ -78,6 +79,7 @@ def get(self, request: Request, group: Group) -> Response:
                         "flag": item["flag"],
                         "score": item["score"],
                         "issue_id": group.id,
+                        "is_filtered": item["is_filtered"],
                     },
                 )
 
diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 932152b4103527..b514d7814bbeee 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -223,7 +223,7 @@ def calculate_z_scores(values: list[float]) -> list[float]:
 
 
 def filter_by_z_score_threshold(
-    values: list[float], z_threshold: float = 1.5, lambda_param: float = 0.0
+    values: list[float], z_threshold: float = 1.5, lambda_param: float | None = None
 ) -> list[int]:
     """
     Get indices of values that pass BoxCox + z-score filtering.
@@ -234,7 +234,7 @@ def filter_by_z_score_threshold(
     Parameters:
         values: List of numerical values to filter
         z_threshold: Minimum z-score threshold for inclusion
-        lambda_param: BoxCox lambda parameter (0 for log transformation)
+        lambda_param: BoxCox lambda parameter (None for automatic selection)
 
     Returns:
         List of indices that pass the filtering criteria
@@ -242,11 +242,7 @@ def filter_by_z_score_threshold(
     if not values:
         return []
 
-    # Apply BoxCox transformation - unpack the tuple to get just the transformed values
     transformed_values, _ = boxcox_transform(values, lambda_param)
-
-    # Calculate z-scores on transformed data
     z_scores = calculate_z_scores(transformed_values)
 
-    # Return indices that meet the threshold
     return [i for i, z_score in enumerate(z_scores) if z_score >= z_threshold]
diff --git a/src/sentry/seer/workflows/compare.py b/src/sentry/seer/workflows/compare.py
index 153305663f32f1..9f7e774397f50d 100644
--- a/src/sentry/seer/workflows/compare.py
+++ b/src/sentry/seer/workflows/compare.py
@@ -3,6 +3,7 @@
 from typing import TypeVar
 
 from sentry.seer.math import (
+    boxcox_transform,
     entropy,
     filter_by_z_score_threshold,
     kl_divergence,
@@ -20,7 +21,7 @@
 
 
 def filter_by_z_score(
-    data: Sequence[KeyedValueCount], z_threshold: float = 1.5, lambda_param: float = 0.0
+    data: Sequence[KeyedValueCount], z_threshold: float = 1.5, lambda_param: float | None = None
 ) -> list[KeyedValueCount]:
     """
     Filter data by applying BoxCox transformation and z-score filtering.
@@ -31,7 +32,7 @@ def filter_by_z_score(
     Parameters:
         data: Sequence of (key, value, count) tuples
         z_threshold: Minimum z-score threshold for inclusion
-        lambda_param: BoxCox lambda parameter (0 for log transformation)
+        lambda_param: BoxCox lambda parameter (None for automatic selection)
 
     Returns:
         Filtered list of (key, value, count) tuples
@@ -39,13 +40,9 @@ def filter_by_z_score(
     if not data:
         return []
 
-    # Extract counts (the third element of each tuple)
     counts = [count for _, _, count in data]
-
-    # Get indices that pass the filtering criteria
     passing_indices = filter_by_z_score_threshold(counts, z_threshold, lambda_param)
 
-    # Filter data based on passing indices
     return [data[i] for i in passing_indices]
 
 
@@ -83,6 +80,8 @@ def keyed_rrf_score(
     entropy_alpha: float = 0.2,
     kl_alpha: float = 0.8,
     offset: int = 60,
+    filter_rrf: bool = False,
+    z_threshold: float = 1.5,
 ) -> list[tuple[str, float]]:
     """
     RRF score a multi-dimensional distribution of values. Returns a list of key, score pairs.
@@ -112,6 +111,26 @@ def _scoring_fn(baseline: list[float], outliers: list[float]):
         entropy_scores.append(entropy_score)
         kl_scores.append(kl_score)
 
+    if filter_rrf:
+        normalized_entropy_scores, _ = boxcox_transform(entropy_scores)
+        normalized_kl_scores, _ = boxcox_transform(kl_scores)
+
+        filtered_keys = []
+        filtered_entropy_scores = []
+        filtered_kl_scores = []
+
+        for i, (key, normalized_entropy_score, normalized_kl_score) in enumerate(
+            zip(keys, normalized_entropy_scores, normalized_kl_scores)
+        ):
+            if normalized_entropy_score > z_threshold or normalized_kl_score > z_threshold:
+                filtered_keys.append(key)
+                filtered_entropy_scores.append(entropy_scores[i])
+                filtered_kl_scores.append(kl_scores[i])
+
+        keys = filtered_keys
+        entropy_scores = filtered_entropy_scores
+        kl_scores = filtered_kl_scores
+
     return sorted(
         zip(keys, rrf_score(entropy_scores, kl_scores, entropy_alpha, kl_alpha, offset)),
         key=lambda k: k[1],
@@ -224,7 +243,7 @@ def _smooth_distribution(dist: Distribution) -> Distribution:
     return dict(zip(dist.keys(), laplace_smooth(list(dist.values()))))
 
 
-def keyed_rrf_score_with_filtering(
+def keyed_rrf_score_with_filter(
     baseline: Sequence[KeyedValueCount],
     outliers: Sequence[KeyedValueCount],
     total_baseline: int,
@@ -232,53 +251,56 @@ def keyed_rrf_score_with_filtering(
     entropy_alpha: float = 0.2,
     kl_alpha: float = 0.8,
     offset: int = 60,
-    apply_filtering: bool = True,
     z_threshold: float = 1.5,
-    lambda_param: float = 0.0,
-    filter_baseline: bool = False,
-    filter_outliers: bool = True,
-) -> tuple[list[tuple[str, float]], list[KeyedValueCount], list[KeyedValueCount]]:
+) -> list[tuple[str, float, bool]]:
     """
-    RRF score a multi-dimensional distribution with optional BoxCox + z-score filtering.
-
-    This function demonstrates how to apply filtering as an independent step before RRF scoring.
+    RRF score a multi-dimensional distribution of values. Returns a list of key, score pairs, and a mapping of if the key was filtered.
+    Duplicates are not tolerated.
 
-    Parameters:
-        baseline: Baseline distribution data
-        outliers: Outliers distribution data
-        total_baseline: Total count for baseline
-        total_outliers: Total count for outliers
-        entropy_alpha: Weight for entropy in RRF
-        kl_alpha: Weight for KL divergence in RRF
-        offset: RRF offset parameter
-        apply_filtering: Whether to apply BoxCox + z-score filtering
-        z_threshold: Z-score threshold for filtering
-        lambda_param: BoxCox lambda parameter
-        filter_baseline: Whether to filter baseline data
-        filter_outliers: Whether to filter outliers data
+    Sample distribution:
+        [("key", "true", 93), ("key", "false", 219), ("other", "true", 1)]
 
-    Returns:
-        Tuple of (scores, filtered_baseline, filtered_outliers)
-        This allows you to inspect the intermediary filtering results
+    Sample output:
+        [("key", 0.5, True), ("key", 0.3, False), ("other", 0.1, False)]
     """
-    filtered_baseline = list(baseline)
-    filtered_outliers = list(outliers)
-
-    if apply_filtering:
-        if filter_baseline:
-            filtered_baseline = filter_by_z_score(baseline, z_threshold, lambda_param)
-        if filter_outliers:
-            filtered_outliers = filter_by_z_score(outliers, z_threshold, lambda_param)
-
-    # Apply RRF scoring to the filtered data
-    scores = keyed_rrf_score(
-        filtered_baseline,
-        filtered_outliers,
+
+    def _scoring_fn(baseline: list[float], outliers: list[float]):
+        return (entropy(outliers), kl_divergence(baseline, outliers))
+
+    scored_keys = _score_each_key(
+        baseline,
+        outliers,
         total_baseline,
         total_outliers,
-        entropy_alpha,
-        kl_alpha,
-        offset,
+        scoring_fn=_scoring_fn,
     )
 
-    return scores, filtered_baseline, filtered_outliers
+    keys = []
+    entropy_scores = []
+    kl_scores = []
+
+    for key, (entropy_score, kl_score) in scored_keys:
+        keys.append(key)
+        entropy_scores.append(entropy_score)
+        kl_scores.append(kl_score)
+
+    normalized_entropy_scores, _ = boxcox_transform(entropy_scores)
+    normalized_kl_scores, _ = boxcox_transform(kl_scores)
+
+    filtered_keys = [False] * len(keys)
+
+    for i, (key, normalized_entropy_score, normalized_kl_score) in enumerate(
+        zip(keys, normalized_entropy_scores, normalized_kl_scores)
+    ):
+        if normalized_entropy_score > z_threshold or normalized_kl_score > z_threshold:
+            filtered_keys[i] = True
+
+    return sorted(
+        zip(
+            keys,
+            rrf_score(entropy_scores, kl_scores, entropy_alpha, kl_alpha, offset),
+            filtered_keys,
+        ),
+        key=lambda k: k[1],
+        reverse=True,
+    )

From 74fc10b98c5cc5e73a64293420ab6d5995780c3e Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 16:00:56 -0700
Subject: [PATCH 4/9] types and tests

---
 src/sentry/issues/suspect_flags.py          |   8 +-
 src/sentry/seer/math.py                     |  37 +------
 src/sentry/seer/workflows/compare.py        |  58 +----------
 tests/sentry/seer/test_math.py              |  63 ++++++++++++
 tests/sentry/seer/workflows/test_compare.py | 108 +++++++++++++++++++-
 5 files changed, 182 insertions(+), 92 deletions(-)

diff --git a/src/sentry/issues/suspect_flags.py b/src/sentry/issues/suspect_flags.py
index 0a456b073e593c..76c3267c05bbe7 100644
--- a/src/sentry/issues/suspect_flags.py
+++ b/src/sentry/issues/suspect_flags.py
@@ -5,7 +5,7 @@
 import sentry_sdk
 from snuba_sdk import Column, Condition, Entity, Function, Limit, Op, Query, Request
 
-from sentry.seer.workflows.compare import KeyedValueCount, keyed_rrf_score
+from sentry.seer.workflows.compare import KeyedValueCount, keyed_rrf_score_with_filter
 from sentry.utils.snuba import raw_snql_query
 
 
@@ -19,6 +19,7 @@ class Score(TypedDict):
     score: float
     baseline_percent: float
     distribution: Distribution
+    is_filtered: bool
 
 
 @sentry_sdk.trace
@@ -42,7 +43,7 @@ def get_suspect_flag_scores(
     outliers_count = query_error_counts(org_id, project_id, start, end, envs, group_id=group_id)
     baseline_count = query_error_counts(org_id, project_id, start, end, envs, group_id=None)
 
-    keyed_scores = keyed_rrf_score(
+    keyed_scores = keyed_rrf_score_with_filter(
         baseline,
         outliers,
         total_baseline=baseline_count,
@@ -67,8 +68,9 @@ def get_suspect_flag_scores(
             "score": score,
             "baseline_percent": baseline_percent_dict[key],
             "distribution": distributions[key],
+            "is_filtered": is_filtered,
         }
-        for key, score in keyed_scores
+        for key, score, is_filtered in keyed_scores
     ]
 
 
diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index b514d7814bbeee..794cf04f61ca96 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -109,8 +109,7 @@ def boxcox_transform(
             transformed = [(pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in values]
         return transformed, lambda_param
 
-    # Find optimal lambda using MLE
-    optimal_lambda = boxcox_normmax(values)
+    optimal_lambda = _boxcox_normmax(values)
 
     if optimal_lambda == 0.0:
         transformed = [math.log(max(v, 1e-10)) for v in values]
@@ -120,7 +119,7 @@ def boxcox_transform(
     return transformed, optimal_lambda
 
 
-def boxcox_llf(lambda_param: float, values: list[float]) -> float:
+def _boxcox_llf(lambda_param: float, values: list[float]) -> float:
     """
     Compute the Box-Cox log-likelihood function.
 
@@ -158,7 +157,7 @@ def boxcox_llf(lambda_param: float, values: list[float]) -> float:
     return (lambda_param - 1) * log_sum - (n / 2) * logvar
 
 
-def boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
+def _boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
     """
     Calculate the approximate optimal lambda parameter for BoxCox transformation that maximizes the log-likelihood.
 
@@ -186,8 +185,8 @@ def boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
         m1 = left + (right - left) / 3
         m2 = right - (right - left) / 3
 
-        llf_m1 = boxcox_llf(m1, values)
-        llf_m2 = boxcox_llf(m2, values)
+        llf_m1 = _boxcox_llf(m1, values)
+        llf_m2 = _boxcox_llf(m2, values)
 
         if llf_m1 > llf_m2:
             right = m2
@@ -220,29 +219,3 @@ def calculate_z_scores(values: list[float]) -> list[float]:
         return [0.0] * len(values)
 
     return [(x - mean_val) / std_dev for x in values]
-
-
-def filter_by_z_score_threshold(
-    values: list[float], z_threshold: float = 1.5, lambda_param: float | None = None
-) -> list[int]:
-    """
-    Get indices of values that pass BoxCox + z-score filtering.
-
-    This function applies BoxCox normalization to the values,
-    calculates z-scores, and returns indices where z-scores >= threshold.
-
-    Parameters:
-        values: List of numerical values to filter
-        z_threshold: Minimum z-score threshold for inclusion
-        lambda_param: BoxCox lambda parameter (None for automatic selection)
-
-    Returns:
-        List of indices that pass the filtering criteria
-    """
-    if not values:
-        return []
-
-    transformed_values, _ = boxcox_transform(values, lambda_param)
-    z_scores = calculate_z_scores(transformed_values)
-
-    return [i for i, z_score in enumerate(z_scores) if z_score >= z_threshold]
diff --git a/src/sentry/seer/workflows/compare.py b/src/sentry/seer/workflows/compare.py
index 9f7e774397f50d..211ac0ab1ea37f 100644
--- a/src/sentry/seer/workflows/compare.py
+++ b/src/sentry/seer/workflows/compare.py
@@ -2,14 +2,7 @@
 from collections.abc import Callable, Generator, Mapping, Sequence
 from typing import TypeVar
 
-from sentry.seer.math import (
-    boxcox_transform,
-    entropy,
-    filter_by_z_score_threshold,
-    kl_divergence,
-    laplace_smooth,
-    rrf_score,
-)
+from sentry.seer.math import boxcox_transform, entropy, kl_divergence, laplace_smooth, rrf_score
 
 T = TypeVar("T")
 
@@ -20,32 +13,6 @@
 Score = tuple[str, float]
 
 
-def filter_by_z_score(
-    data: Sequence[KeyedValueCount], z_threshold: float = 1.5, lambda_param: float | None = None
-) -> list[KeyedValueCount]:
-    """
-    Filter data by applying BoxCox transformation and z-score filtering.
-
-    This function applies BoxCox normalization to the count values in the data,
-    calculates z-scores, and filters to keep only items with z-scores >= threshold.
-
-    Parameters:
-        data: Sequence of (key, value, count) tuples
-        z_threshold: Minimum z-score threshold for inclusion
-        lambda_param: BoxCox lambda parameter (None for automatic selection)
-
-    Returns:
-        Filtered list of (key, value, count) tuples
-    """
-    if not data:
-        return []
-
-    counts = [count for _, _, count in data]
-    passing_indices = filter_by_z_score_threshold(counts, z_threshold, lambda_param)
-
-    return [data[i] for i in passing_indices]
-
-
 def keyed_kl_score(
     baseline: Sequence[KeyedValueCount],
     outliers: Sequence[KeyedValueCount],
@@ -80,8 +47,6 @@ def keyed_rrf_score(
     entropy_alpha: float = 0.2,
     kl_alpha: float = 0.8,
     offset: int = 60,
-    filter_rrf: bool = False,
-    z_threshold: float = 1.5,
 ) -> list[tuple[str, float]]:
     """
     RRF score a multi-dimensional distribution of values. Returns a list of key, score pairs.
@@ -111,26 +76,6 @@ def _scoring_fn(baseline: list[float], outliers: list[float]):
         entropy_scores.append(entropy_score)
         kl_scores.append(kl_score)
 
-    if filter_rrf:
-        normalized_entropy_scores, _ = boxcox_transform(entropy_scores)
-        normalized_kl_scores, _ = boxcox_transform(kl_scores)
-
-        filtered_keys = []
-        filtered_entropy_scores = []
-        filtered_kl_scores = []
-
-        for i, (key, normalized_entropy_score, normalized_kl_score) in enumerate(
-            zip(keys, normalized_entropy_scores, normalized_kl_scores)
-        ):
-            if normalized_entropy_score > z_threshold or normalized_kl_score > z_threshold:
-                filtered_keys.append(key)
-                filtered_entropy_scores.append(entropy_scores[i])
-                filtered_kl_scores.append(kl_scores[i])
-
-        keys = filtered_keys
-        entropy_scores = filtered_entropy_scores
-        kl_scores = filtered_kl_scores
-
     return sorted(
         zip(keys, rrf_score(entropy_scores, kl_scores, entropy_alpha, kl_alpha, offset)),
         key=lambda k: k[1],
@@ -255,6 +200,7 @@ def keyed_rrf_score_with_filter(
 ) -> list[tuple[str, float, bool]]:
     """
     RRF score a multi-dimensional distribution of values. Returns a list of key, score pairs, and a mapping of if the key was filtered.
+    The filtered keys are those that have a normalized entropy or kl score greater than the z_threshold.
     Duplicates are not tolerated.
 
     Sample distribution:
diff --git a/tests/sentry/seer/test_math.py b/tests/sentry/seer/test_math.py
index 66e3f03c1c5e21..af6e85b3ae0923 100644
--- a/tests/sentry/seer/test_math.py
+++ b/tests/sentry/seer/test_math.py
@@ -1,6 +1,8 @@
 import math
 
 from sentry.seer.math import (
+    boxcox_transform,
+    calculate_z_scores,
     entropy,
     kl_divergence,
     laplace_smooth,
@@ -90,3 +92,64 @@ def test_rrf_score():
 def test_rank_min():
     assert rank_min(xs=[1, 2, 2, 2, 3], ascending=False) == [3, 2, 2, 2, 1]
     assert rank_min(xs=[1, 2, 2, 2, 3], ascending=True) == [1, 2, 2, 2, 3]
+
+
+def test_boxcox_transform():
+    # Test with lambda = 0 (log transformation)
+    values = [1.0, 2.0, 4.0, 8.0]
+    transformed, lambda_used = boxcox_transform(values, lambda_param=0.0)
+    expected = [math.log(v) for v in values]
+    assert lambda_used == 0.0
+    for t, e in zip(transformed, expected):
+        assert math.isclose(t, e, rel_tol=1e-9)
+
+    # Test with lambda = 1 (no transformation, just (x-1)/1 = x-1)
+    transformed, lambda_used = boxcox_transform(values, lambda_param=1.0)
+    expected = [v - 1.0 for v in values]
+    assert lambda_used == 1.0
+    for t, e in zip(transformed, expected):
+        assert math.isclose(t, e, rel_tol=1e-9)
+
+    # Test with lambda = 0.5 (square root transformation)
+    transformed, lambda_used = boxcox_transform(values, lambda_param=0.5)
+    expected = [(math.sqrt(v) - 1.0) / 0.5 for v in values]
+    assert lambda_used == 0.5
+    for t, e in zip(transformed, expected):
+        assert math.isclose(t, e, rel_tol=1e-9)
+
+    # Test auto lambda detection
+    transformed, lambda_used = boxcox_transform(values, lambda_param=None)
+    assert isinstance(lambda_used, float)
+    assert len(transformed) == len(values)
+
+    # Test empty input
+    transformed, lambda_used = boxcox_transform([], lambda_param=0.0)
+    assert transformed == []
+    assert lambda_used == 0.0
+
+
+def test_calculate_z_scores():
+    values = [1.0, 2.0, 3.0, 4.0, 5.0]
+    z_scores = calculate_z_scores(values)
+
+    expected_mean = 3.0
+    expected_std = math.sqrt(2.0)
+    expected = [(v - expected_mean) / expected_std for v in values]
+
+    assert len(z_scores) == len(values)
+    for z, e in zip(z_scores, expected):
+        assert math.isclose(z, e, rel_tol=1e-9)
+
+    same_values = [5.0, 5.0, 5.0, 5.0]
+    z_scores = calculate_z_scores(same_values)
+    assert all(z == 0.0 for z in z_scores)
+
+    assert calculate_z_scores([]) == []
+
+    single_z = calculate_z_scores([42.0])
+    assert single_z == [0.0]
+
+    simple_values = [0.0, 10.0]
+    z_scores = calculate_z_scores(simple_values)
+    assert math.isclose(z_scores[0], -1.0, rel_tol=1e-9)
+    assert math.isclose(z_scores[1], 1.0, rel_tol=1e-9)
diff --git a/tests/sentry/seer/workflows/test_compare.py b/tests/sentry/seer/workflows/test_compare.py
index 446e6a5168c7ad..f712089c73bf95 100644
--- a/tests/sentry/seer/workflows/test_compare.py
+++ b/tests/sentry/seer/workflows/test_compare.py
@@ -1,6 +1,10 @@
 import math
 
-from sentry.seer.workflows.compare import keyed_kl_score, keyed_rrf_score
+from sentry.seer.workflows.compare import (
+    keyed_kl_score,
+    keyed_rrf_score,
+    keyed_rrf_score_with_filter,
+)
 
 
 def test_keyed_kl_score():
@@ -211,3 +215,105 @@ def test_small_support():
     )
     attributes = [s[0] for s in scores]
     assert attributes == ["country", "browser", "device"]
+
+
+def test_keyed_rrf_score_with_filter_basic():
+    """
+    Test basic functionality of keyed_rrf_score_with_filter
+    """
+    baseline = [
+        ("key", "true", 10),
+        ("key", "false", 200),
+        ("other", "true", 1000),
+        ("other", "false", 5000),
+    ]
+    outliers = [("key", "true", 10), ("other", "true", 100), ("other", "false", 500)]
+
+    scores = keyed_rrf_score_with_filter(
+        baseline,
+        outliers,
+        total_baseline=sum(i[2] for i in baseline),
+        total_outliers=sum(i[2] for i in outliers),
+        z_threshold=1.5,
+    )
+
+    # Should return tuples of (key, score, filtered_boolean)
+    assert len(scores) == 2
+    for key, score, filtered in scores:
+        assert isinstance(key, str)
+        assert isinstance(score, float)
+        assert isinstance(filtered, bool)
+        assert score >= 0
+
+
+def test_keyed_rrf_score_with_filter_threshold_behavior():
+    """
+    Test filtering behavior with different z_threshold values
+    """
+    baseline = [
+        ("key", "true", 10),
+        ("key", "false", 200),
+        ("other", "true", 1000),
+        ("other", "false", 5000),
+    ]
+    outliers = [("key", "true", 10), ("other", "true", 100), ("other", "false", 500)]
+
+    # With high threshold, no keys should be filtered
+    high_threshold_scores = keyed_rrf_score_with_filter(
+        baseline,
+        outliers,
+        total_baseline=sum(i[2] for i in baseline),
+        total_outliers=sum(i[2] for i in outliers),
+        z_threshold=10.0,
+    )
+
+    for key, score, filtered in high_threshold_scores:
+        assert not filtered, f"Key {key} should not be filtered with high threshold"
+
+
+def test_keyed_rrf_score_with_filter_empty_inputs():
+    """
+    Test with empty inputs
+    """
+    scores = keyed_rrf_score_with_filter(
+        [], [], total_baseline=0, total_outliers=0, z_threshold=1.5
+    )
+    assert scores == []
+
+
+def test_keyed_rrf_score_with_filter_consistency_with_regular_rrf():
+    """
+    Test that the scores are consistent with keyed_rrf_score
+    """
+    baseline = [
+        ("key", "true", 10),
+        ("key", "false", 200),
+        ("other", "true", 1000),
+        ("other", "false", 5000),
+    ]
+    outliers = [("key", "true", 10), ("other", "true", 100), ("other", "false", 500)]
+
+    # Get scores from both functions
+    filtered_scores = keyed_rrf_score_with_filter(
+        baseline,
+        outliers,
+        total_baseline=sum(i[2] for i in baseline),
+        total_outliers=sum(i[2] for i in outliers),
+        z_threshold=1.5,
+    )
+
+    regular_scores = keyed_rrf_score(
+        baseline,
+        outliers,
+        total_baseline=sum(i[2] for i in baseline),
+        total_outliers=sum(i[2] for i in outliers),
+    )
+
+    # Extract just the key-score pairs and sort them for comparison
+    filtered_key_scores = sorted([(key, score) for key, score, _ in filtered_scores])
+    regular_key_scores = sorted(regular_scores)
+
+    # The scores should be identical
+    for (key1, score1), (key2, score2) in zip(filtered_key_scores, regular_key_scores):
+        assert key1 == key2
+        assert math.isclose(score1, score2, rel_tol=1e-9)

From 1fd3aff72c9388d43a5cf85560c0e486e8ecb0c3 Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 16:05:28 -0700
Subject: [PATCH 5/9] typo

---
 src/sentry/seer/math.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 794cf04f61ca96..9e792e5b37ce5b 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -161,7 +161,7 @@ def _boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
     """
     Calculate the approximate optimal lambda parameter for BoxCox transformation that maximizes the log-likelihood.
 
-    Uses MLE method with ternary search rather than Brent's methodfor efficient optimization.
+    Uses MLE method with ternary search rather than Brent's method for efficient optimization.
 
     Parameters:
         values: List of positive values

From 18f9df8f609a5edf78bad8f102806d777d5056be Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 16:50:20 -0700
Subject: [PATCH 6/9] Ensure 0 is handled and update tests

---
 src/sentry/seer/math.py                                      | 5 +++--
 .../endpoints/test_organization_group_suspect_flags.py       | 2 ++
 tests/sentry/issues/test_suspect_flags.py                    | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 9e792e5b37ce5b..7287260c1d71a1 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -173,8 +173,9 @@ def _boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
     if not values:
         return 0.0
 
-    if any(v <= 0 for v in values):
-        raise ValueError("All values must be positive for BoxCox transformation")
+    min_value = min(values)
+    if min_value <= 0:
+        values = [v - min_value + 1 for v in values]
 
     left = -2.0
     right = 2.0
diff --git a/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py b/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py
index 5f033923f93372..2ec27ab743ca81 100644
--- a/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py
+++ b/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py
@@ -69,6 +69,7 @@ def test_get(self) -> None:
                             "true": 1,
                         },
                     },
+                    "is_filtered": True,
                 },
                 {
                     "flag": "other",
@@ -82,6 +83,7 @@ def test_get(self) -> None:
                             "false": 1,
                         },
                     },
+                    "is_filtered": False,
                 },
             ]
         }
diff --git a/tests/sentry/issues/test_suspect_flags.py b/tests/sentry/issues/test_suspect_flags.py
index 23ee063ddfdabb..d01dff6299c3ef 100644
--- a/tests/sentry/issues/test_suspect_flags.py
+++ b/tests/sentry/issues/test_suspect_flags.py
@@ -132,11 +132,13 @@ def test_get_suspect_flag_scores(self) -> None:
                     "baseline": {"false": 1, "true": 1},
                     "outliers": {"true": 1},
                 },
+                "is_filtered": True,
             },
             {
                 "flag": "other",
                 "score": 0.016181914331041776,
                 "baseline_percent": 0,
                 "distribution": {"baseline": {"false": 2}, "outliers": {"false": 1}},
+                "is_filtered": False,
             },
         ]

From 271bb90f6cda14b4c53efe0accea28a303032ac7 Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 17:31:36 -0700
Subject: [PATCH 7/9] use the correct values for z score calculation

---
 src/sentry/seer/workflows/compare.py          | 22 ++++++++++++-------
 .../test_organization_group_suspect_flags.py  |  2 +-
 tests/sentry/issues/test_suspect_flags.py     |  2 +-
 tests/sentry/seer/workflows/test_compare.py   |  4 ++--
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/sentry/seer/workflows/compare.py b/src/sentry/seer/workflows/compare.py
index 211ac0ab1ea37f..fc6f4b3ef43404 100644
--- a/src/sentry/seer/workflows/compare.py
+++ b/src/sentry/seer/workflows/compare.py
@@ -2,7 +2,14 @@
 from collections.abc import Callable, Generator, Mapping, Sequence
 from typing import TypeVar
 
-from sentry.seer.math import boxcox_transform, entropy, kl_divergence, laplace_smooth, rrf_score
+from sentry.seer.math import (
+    boxcox_transform,
+    calculate_z_scores,
+    entropy,
+    kl_divergence,
+    laplace_smooth,
+    rrf_score,
+)
 
 T = TypeVar("T")
 
@@ -232,14 +239,13 @@ def _scoring_fn(baseline: list[float], outliers: list[float]):
 
     normalized_entropy_scores, _ = boxcox_transform(entropy_scores)
     normalized_kl_scores, _ = boxcox_transform(kl_scores)
+    entropy_z_scores = calculate_z_scores(normalized_entropy_scores)
+    kl_z_scores = calculate_z_scores(normalized_kl_scores)
 
-    filtered_keys = [False] * len(keys)
-
-    for i, (key, normalized_entropy_score, normalized_kl_score) in enumerate(
-        zip(keys, normalized_entropy_scores, normalized_kl_scores)
-    ):
-        if normalized_entropy_score > z_threshold or normalized_kl_score > z_threshold:
-            filtered_keys[i] = True
+    filtered_keys = [
+        entropy_z_score <= z_threshold or kl_z_score <= z_threshold
+        for entropy_z_score, kl_z_score in zip(entropy_z_scores, kl_z_scores)
+    ]
 
     return sorted(
         zip(
diff --git a/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py b/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py
index 2ec27ab743ca81..8e662ecf7c43be 100644
--- a/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py
+++ b/tests/sentry/issues/endpoints/test_organization_group_suspect_flags.py
@@ -83,7 +83,7 @@ def test_get(self) -> None:
                             "false": 1,
                         },
                     },
-                    "is_filtered": False,
+                    "is_filtered": True,
                 },
             ]
         }
diff --git a/tests/sentry/issues/test_suspect_flags.py b/tests/sentry/issues/test_suspect_flags.py
index d01dff6299c3ef..da50859f57c336 100644
--- a/tests/sentry/issues/test_suspect_flags.py
+++ b/tests/sentry/issues/test_suspect_flags.py
@@ -139,6 +139,6 @@ def test_get_suspect_flag_scores(self) -> None:
                 "score": 0.016181914331041776,
                 "baseline_percent": 0,
                 "distribution": {"baseline": {"false": 2}, "outliers": {"false": 1}},
-                "is_filtered": False,
+                "is_filtered": True,
             },
         ]
diff --git a/tests/sentry/seer/workflows/test_compare.py b/tests/sentry/seer/workflows/test_compare.py
index f712089c73bf95..404e168c49de06 100644
--- a/tests/sentry/seer/workflows/test_compare.py
+++ b/tests/sentry/seer/workflows/test_compare.py
@@ -258,13 +258,13 @@ def test_keyed_rrf_score_with_filter_threshold_behavior():
     ]
     outliers = [("key", "true", 10), ("other", "true", 100), ("other", "false", 500)]
 
-    # With high threshold, no keys should be filtered
+    # With low threshold, no keys should be filtered
     high_threshold_scores = keyed_rrf_score_with_filter(
         baseline,
         outliers,
         total_baseline=sum(i[2] for i in baseline),
         total_outliers=sum(i[2] for i in outliers),
-        z_threshold=10.0,
+        z_threshold=-10.0,
     )
 
     for key, score, filtered in high_threshold_scores:

From aa9645542b6cdb284b7488992c45ad1f3373628b Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Mon, 7 Jul 2025 21:31:37 -0700
Subject: [PATCH 8/9] bugs

---
 src/sentry/seer/math.py              | 18 ++++++++++++++----
 src/sentry/seer/workflows/compare.py |  4 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 7287260c1d71a1..578ef17d9138bc 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -101,20 +101,30 @@ def boxcox_transform(
     Returns:
         Tuple of (transformed values, lambda parameter used)
     """
+    min_value = min(values) if values else 0
+    if min_value <= 0:
+        shift_amount = -min_value + 1
+        shifted_values = [v + shift_amount for v in values]
+    else:
+        shifted_values = values
 
     if lambda_param is not None:
         if lambda_param == 0.0:
-            transformed = [math.log(max(v, 1e-10)) for v in values]
+            transformed = [math.log(max(v, 1e-10)) for v in shifted_values]
         else:
-            transformed = [(pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in values]
+            transformed = [
+                (pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in shifted_values
+            ]
         return transformed, lambda_param
 
     optimal_lambda = _boxcox_normmax(values)
 
     if optimal_lambda == 0.0:
-        transformed = [math.log(max(v, 1e-10)) for v in values]
+        transformed = [math.log(max(v, 1e-10)) for v in shifted_values]
     else:
-        transformed = [(pow(max(v, 1e-10), optimal_lambda) - 1) / optimal_lambda for v in values]
+        transformed = [
+            (pow(max(v, 1e-10), optimal_lambda) - 1) / optimal_lambda for v in shifted_values
+        ]
 
     return transformed, optimal_lambda
 
diff --git a/src/sentry/seer/workflows/compare.py b/src/sentry/seer/workflows/compare.py
index fc6f4b3ef43404..4765d7226ce179 100644
--- a/src/sentry/seer/workflows/compare.py
+++ b/src/sentry/seer/workflows/compare.py
@@ -207,7 +207,7 @@ def keyed_rrf_score_with_filter(
 ) -> list[tuple[str, float, bool]]:
     """
     RRF score a multi-dimensional distribution of values. Returns a list of key, score pairs, and a mapping of if the key was filtered.
-    The filtered keys are those that have a normalized entropy or kl score greater than the z_threshold.
+    The filtered keys are those that have a normalized entropy and kl score less than the z_threshold.
     Duplicates are not tolerated.
 
     Sample distribution:
@@ -243,7 +243,7 @@ def _scoring_fn(baseline: list[float], outliers: list[float]):
     kl_z_scores = calculate_z_scores(normalized_kl_scores)
 
     filtered_keys = [
-        entropy_z_score <= z_threshold or kl_z_score <= z_threshold
+        entropy_z_score <= z_threshold and kl_z_score <= z_threshold
         for entropy_z_score, kl_z_score in zip(entropy_z_scores, kl_z_scores)
     ]
 

From 93d2f054a45ecb71e88eb66788574a50227d2ce2 Mon Sep 17 00:00:00 2001
From: Aayush Seth <aayush.seth@sentry.io>
Date: Tue, 8 Jul 2025 11:35:45 -0700
Subject: [PATCH 9/9] update using shifted values and clean up boxcox function

---
 src/sentry/seer/math.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/sentry/seer/math.py b/src/sentry/seer/math.py
index 578ef17d9138bc..e0f433f7e74f5e 100644
--- a/src/sentry/seer/math.py
+++ b/src/sentry/seer/math.py
@@ -103,30 +103,23 @@ def boxcox_transform(
     """
     min_value = min(values) if values else 0
     if min_value <= 0:
-        shift_amount = -min_value + 1
+        shift_amount = -min_value + 1e-10
         shifted_values = [v + shift_amount for v in values]
     else:
         shifted_values = values
 
-    if lambda_param is not None:
-        if lambda_param == 0.0:
-            transformed = [math.log(max(v, 1e-10)) for v in shifted_values]
-        else:
-            transformed = [
-                (pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in shifted_values
-            ]
-        return transformed, lambda_param
-
-    optimal_lambda = _boxcox_normmax(values)
+    # Get lambda parameter: use provided one or find optimal
+    lambda_param = _boxcox_normmax(shifted_values) if lambda_param is None else lambda_param
 
-    if optimal_lambda == 0.0:
+    # Apply transformation
+    if lambda_param == 0.0:
         transformed = [math.log(max(v, 1e-10)) for v in shifted_values]
     else:
         transformed = [
-            (pow(max(v, 1e-10), optimal_lambda) - 1) / optimal_lambda for v in shifted_values
+            (pow(max(v, 1e-10), lambda_param) - 1) / lambda_param for v in shifted_values
         ]
 
-    return transformed, optimal_lambda
+    return transformed, lambda_param
 
 
 def _boxcox_llf(lambda_param: float, values: list[float]) -> float:
@@ -183,10 +176,6 @@ def _boxcox_normmax(values: list[float], max_iters: int = 100) -> float:
     if not values:
         return 0.0
 
-    min_value = min(values)
-    if min_value <= 0:
-        values = [v - min_value + 1 for v in values]
-
     left = -2.0
     right = 2.0
     tolerance = 1e-6