open-edge-platform
diff --git a/‎src/anomalib/metrics/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎src/anomalib/metrics/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/anomalib/metrics/anomaly_score_distribution.py‎
Lines changed: 70 additions & 8 deletions b/‎src/anomalib/metrics/anomaly_score_distribution.py‎
Lines changed: 70 additions & 8 deletions
diff --git a/‎src/anomalib/metrics/pg_pb.py‎
Lines changed: 219 additions & 0 deletions b/‎src/anomalib/metrics/pg_pb.py‎
Lines changed: 219 additions & 0 deletions
@@ -25,6 +25,8 @@
     - ``BinaryPrecisionRecallCurve``: Computes precision-recall curves
     - ``Evaluator``: Combines multiple metrics for evaluation
     - ``MinMax``: Normalizes scores to [0,1] range
+    - ``PBn``: Presorted bad with n% good samples misclassified
+    - ``PGn``: Presorted good with n% bad samples missed
     - ``PRO``: Per-Region Overlap score
     - ``PIMO``: Per-Image Missed Overlap score
 
@@ -56,6 +58,7 @@
 from .evaluator import Evaluator
 from .f1_score import F1Max, F1Score
 from .min_max import MinMax
+from .pg_pb import PBn, PGn
 from .pimo import AUPIMO, PIMO
 from .precision_recall_curve import BinaryPrecisionRecallCurve
 from .pro import PRO
@@ -75,6 +78,8 @@
     "F1Score",
     "ManualThreshold",
     "MinMax",
+    "PGn",
+    "PBn",
     "PRO",
     "PIMO",
     "AUPIMO",
 
@@ -3,9 +3,11 @@
 
 """Compute statistics of anomaly score distributions.
 
-This module provides the ``AnomalyScoreDistribution`` class which computes mean
-and standard deviation statistics of anomaly scores from normal training data.
+This module provides the ``AnomalyScoreDistribution`` class, which computes the mean
+and standard deviation statistics of anomaly scores.
 Statistics are computed for both image-level and pixel-level scores.
+The ``plot`` method generates a histogram of anomaly scores,
+separated by label, to visualize score distributions for normal and abnormal samples.
 
 The class tracks:
     - Image-level statistics: Mean and std of image anomaly scores
@@ -17,29 +19,34 @@
     >>> # Create sample data
     >>> scores = torch.tensor([0.1, 0.2, 0.15])  # Image anomaly scores
     >>> maps = torch.tensor([[0.1, 0.2], [0.15, 0.25]])  # Pixel anomaly maps
+    >>> labels = torch.tensor([0, 1, 0])  # Binary labels
     >>> # Initialize and compute stats
     >>> dist = AnomalyScoreDistribution()
-    >>> dist.update(anomaly_scores=scores, anomaly_maps=maps)
+    >>> dist.update(anomaly_scores=scores, anomaly_maps=maps, labels=labels)
     >>> image_mean, image_std, pixel_mean, pixel_std = dist.compute()
+    >>> fig, title = dist.plot()
 
 Note:
     The input scores and maps are log-transformed before computing statistics.
-    Both image-level scores and pixel-level maps are optional inputs.
+    Image-level scores, pixel-level maps, and labels are optional inputs.
 """
 
 import torch
+from matplotlib.figure import Figure
 from torchmetrics import Metric
 
+from .utils import plot_score_histogram
+
 
 class AnomalyScoreDistribution(Metric):
     """Compute distribution statistics of anomaly scores.
 
     This class tracks and computes the mean and standard deviation of anomaly
-    scores from the normal samples in the training set. Statistics are computed
-    for both image-level scores and pixel-level anomaly maps.
+    scores. Statistics are computed for both image-level scores and pixel-level
+    anomaly maps.
 
-    The metric maintains internal state to accumulate scores and maps across
-    batches before computing final statistics.
+    The metric maintains internal state to accumulate scores, anomaly maps,
+    and labels across batches before computing final statistics.
 
     Example:
         >>> dist = AnomalyScoreDistribution()
@@ -59,6 +66,7 @@ def __init__(self, **kwargs) -> None:
         super().__init__(**kwargs)
         self.anomaly_maps: list[torch.Tensor] = []
         self.anomaly_scores: list[torch.Tensor] = []
+        self.labels: list[torch.Tensor] = []
 
         self.add_state("image_mean", torch.empty(0), persistent=True)
         self.add_state("image_std", torch.empty(0), persistent=True)
@@ -75,6 +83,7 @@ def update(
         *args,
         anomaly_scores: torch.Tensor | None = None,
         anomaly_maps: torch.Tensor | None = None,
+        labels: torch.Tensor | None = None,
         **kwargs,
     ) -> None:
         """Update the internal state with new scores and maps.
@@ -83,6 +92,7 @@ def update(
             *args: Unused positional arguments.
             anomaly_scores: Batch of image-level anomaly scores.
             anomaly_maps: Batch of pixel-level anomaly maps.
+            labels: Batch of binary labels.
             **kwargs: Unused keyword arguments.
         """
         del args, kwargs  # These variables are not used.
@@ -91,6 +101,8 @@ def update(
             self.anomaly_maps.append(anomaly_maps)
         if anomaly_scores is not None:
             self.anomaly_scores.append(anomaly_scores)
+        if labels is not None:
+            self.labels.append(labels)
 
     def compute(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """Compute distribution statistics from accumulated scores and maps.
@@ -116,3 +128,53 @@ def compute(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tenso
             self.pixel_std = anomaly_maps.std(dim=0).squeeze()
 
         return self.image_mean, self.image_std, self.pixel_mean, self.pixel_std
+
+    def plot(
+        self,
+        bins: int = 30,
+        good_color: str = "skyblue",
+        bad_color: str = "salmon",
+        xlabel: str = "Score",
+        ylabel: str = "Relative Count",
+        title: str = "Score Histogram",
+        legend_labels: tuple[str, str] = ("Good", "Bad"),
+    ) -> tuple[Figure, str]:
+        """Generate a histogram of scores.
+
+        Args:
+            bins (int, optional): Number of histogram bins. Defaults to 30.
+            good_color (str, optional): Color for good samples. Defaults to "skyblue".
+            bad_color (str, optional): Color for bad samples. Defaults to "salmon".
+            xlabel (str, optional): Label for the x-axis. Defaults to "Score".
+            ylabel (str, optional): Label for the y-axis. Defaults to "Relative Count".
+            title (str, optional): Title of the plot. Defaults to "Score Histogram".
+            legend_labels (tuple[str, str], optional): Legend labels for good and bad samples.
+                Defaults to ("Good", "Bad").
+
+        Returns:
+            tuple[Figure, str]: Tuple containing both the figure and the figure
+                title to be used for logging
+
+        Raises:
+            ValueError: If no anomaly scores or labels are available.
+        """
+        if len(self.anomaly_scores) == 0:
+            msg = "No anomaly scores available."
+            raise ValueError(msg)
+        if len(self.labels) == 0:
+            msg = "No labels available."
+            raise ValueError(msg)
+
+        fig, _ = plot_score_histogram(
+            scores=torch.hstack(self.anomaly_scores),
+            labels=torch.hstack(self.labels),
+            bins=bins,
+            good_color=good_color,
+            bad_color=bad_color,
+            xlabel=xlabel,
+            ylabel=ylabel,
+            title=title,
+            legend_labels=legend_labels,
+        )
+
+        return fig, title
@@ -0,0 +1,219 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""PGn and PBn metrics for binary image-level classification tasks.
+
+This module provides two metrics for evaluating binary image-level classification performance
+on the assumption that bad (anomalous) samples are considered to be the positive class:
+
+- ``PGn``: Presorted good with n% bad samples missed, can be interpreted as true negative rate
+at a fixed false negative rate (TNR@nFNR).
+- ``PBn``: Presorted bad with n% good samples misclassified, can be interpreted as true positive rate
+at a fixed false positive rate (TPR@nFPR).
+
+These metrics emphasize the practical applications of anomaly detection models by showing their potential
+to reduce human operator workload while maintaining an acceptable level of misclassification.
+
+Example:
+    >>> from anomalib.metrics import PGn, PBn
+    >>> from anomalib.data import ImageBatch
+    >>> import torch
+    >>> # Create sample batch
+    >>> batch = ImageBatch(
+    ...     image=torch.rand(4, 3, 32, 32),
+    ...     pred_score=torch.tensor([0.1, 0.4, 0.35, 0.8]),
+    ...     gt_label=torch.tensor([0, 0, 1, 1])
+    ... )
+    >>> pg = PGn(fnr=0.2)
+    >>> # Print name of the metric
+    >>> print(pg.name)
+    PG20
+    >>> # Compute PGn score
+    >>> pg.update(batch)
+    >>> pg.compute()
+    tensor(1.0)
+    >>> pb = PBn(fpr=0.2)
+    >>> # Print name of the metric
+    >>> print(pb.name)
+    PB20
+    >>> # Compute PBn score
+    >>> pb.update(batch)
+    >>> pb.compute()
+    tensor(1.0)
+
+Note:
+    Scores for both metrics range from 0 to 1, with 1 indicating perfect separation
+    of the respective class with ``n``% or less of the other class misclassified.
+
+Reference:
+    Aimira Baitieva, Yacine Bouaouni, Alexandre Briot, Dick Ameln, Souhaiel Khalfaoui,
+    Samet Akcay; Beyond Academic Benchmarks: Critical Analysis and Best Practices
+    for Visual Industrial Anomaly Detection; in: Proceedings of the IEEE/CVF Conference
+    on Computer Vision and Pattern Recognition (CVPR) Workshops, 2025, pp. 4024-4034,
+    https://arxiv.org/abs/2503.23451
+"""
+
+import torch
+from torchmetrics import Metric
+from torchmetrics.utilities import dim_zero_cat
+
+from anomalib.metrics.base import AnomalibMetric
+
+
+class _PGn(Metric):
+    """Presorted good metric.
+
+    This class calculates the Presorted good (PGn) metric, which is the true negative rate
+    at a fixed false negative rate.
+
+    Args:
+        **kwargs: Additional arguments passed to the parent ``Metric`` class.
+
+    Attributes:
+        fnr (torch.Tensor): Fixed false negative rate (bad parts misclassified).
+        Defaults to ``0.05``.
+
+    Example:
+        >>> from anomalib.metrics.pg_pb import _PGn
+        >>> import torch
+        >>> # Create sample data
+        >>> preds = torch.tensor([0.1, 0.4, 0.35, 0.8])
+        >>> target = torch.tensor([0, 0, 1, 1])
+        >>> # Compute PGn score
+        >>> pg = _PGn(fnr=0.2)
+        >>> pg.update(preds, target)
+        >>> pg.compute()
+        tensor(1.0)
+    """
+
+    def __init__(self, fnr: float = 0.05, **kwargs) -> None:
+        super().__init__(**kwargs)
+        if fnr < 0 or fnr > 1:
+            msg = f"False negative rate must be in the range between 0 and 1, got {fnr}."
+            raise ValueError(msg)
+
+        self.fnr = torch.tensor(fnr, dtype=torch.float32)
+        self.name = "PG" + str(int(fnr * 100))
+
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+
+    def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
+        """Update state with new values.
+
+        Args:
+            preds (torch.Tensor): predictions of the model
+            target (torch.Tensor): ground truth targets
+        """
+        self.target.append(target)
+        self.preds.append(preds)
+
+    def compute(self) -> torch.Tensor:
+        """Compute the PGn score at a given false negative rate.
+
+        Returns:
+            torch.Tensor: PGn score value.
+
+        Raises:
+            ValueError: If no negative samples are found.
+        """
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+
+        pos_scores = preds[target == 1]
+        thr_accept = torch.quantile(pos_scores, self.fnr)
+
+        neg_scores = preds[target == 0]
+        if neg_scores.numel() == 0:
+            msg = "No negative samples found. Cannot compute PGn score."
+            raise ValueError(msg)
+        pg = neg_scores[neg_scores < thr_accept].numel() / neg_scores.numel()
+
+        return torch.tensor(pg, dtype=preds.dtype)
+
+
+class PGn(AnomalibMetric, _PGn):  # type: ignore[misc]
+    """Wrapper to add AnomalibMetric functionality to PGn metric.
+
+    This class wraps the internal ``_PGn`` metric to make it compatible with
+    Anomalib's batch processing capabilities.
+    """
+
+    default_fields = ("pred_score", "gt_label")
+
+
+class _PBn(Metric):
+    """Presorted bad metric.
+
+    This class calculates the Presorted bad (PBn) metric, which is the true positive rate
+    at a fixed false positive rate.
+
+    Args:
+        fpr (float): Fixed false positive rate (good parts misclassified). Defaults to ``0.05``.
+        **kwargs: Additional arguments passed to the parent ``Metric`` class.
+
+    Example:
+        >>> from anomalib.metrics import _PBn
+        >>> import torch
+        >>> preds = torch.tensor([0.1, 0.4, 0.35, 0.8])
+        >>> target = torch.tensor([0, 0, 1, 1])
+        >>> pb = _PBn(fpr=0.2)
+        >>> pb.update(preds, target)
+        >>> pb.compute()
+        tensor(1.0)
+    """
+
+    def __init__(self, fpr: float = 0.05, **kwargs) -> None:
+        super().__init__(**kwargs)
+        if fpr < 0 or fpr > 1:
+            msg = f"False positive rate must be in the range between 0 and 1, got {fpr}."
+            raise ValueError(msg)
+
+        self.fpr = torch.tensor(fpr, dtype=torch.float32)
+        self.name = "PB" + str(int(fpr * 100))
+
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+
+    def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
+        """Update state with new values.
+
+        Args:
+            preds (torch.Tensor): predictions of the model
+            target (torch.Tensor): ground truth targets
+        """
+        self.target.append(target)
+        self.preds.append(preds)
+
+    def compute(self) -> torch.Tensor:
+        """Compute the PBn score at a given false positive rate.
+
+        Returns:
+            torch.Tensor: PBn score value.
+
+        Raises:
+            ValueError: If no positive samples are found.
+        """
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+
+        neg_scores = preds[target == 0]
+        thr_accept = torch.quantile(neg_scores, 1 - self.fpr)
+
+        pos_scores = preds[target == 1]
+        if pos_scores.numel() == 0:
+            msg = "No positive samples found. Cannot compute PBn score."
+            raise ValueError(msg)
+        pb = pos_scores[pos_scores > thr_accept].numel() / pos_scores.numel()
+
+        return torch.tensor(pb, dtype=preds.dtype)
+
+
+class PBn(AnomalibMetric, _PBn):  # type: ignore[misc]
+    """Wrapper to add AnomalibMetric functionality to PBn metric.
+
+    This class wraps the internal ``_PBn`` metric to make it compatible with
+    Anomalib's batch processing capabilities.
+    """
+
+    default_fields = ("pred_score", "gt_label")