diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py index 5b06120b2..e12ed2044 100644 --- a/supervision/annotators/core.py +++ b/supervision/annotators/core.py @@ -3,7 +3,7 @@ import cv2 import numpy as np -from PIL import Image, ImageDraw, ImageFont +from PIL import ImageDraw, ImageFont from supervision.annotators.base import BaseAnnotator, ImageType from supervision.annotators.utils import ( @@ -18,7 +18,10 @@ from supervision.draw.color import Color, ColorPalette from supervision.draw.utils import draw_polygon from supervision.geometry.core import Position -from supervision.utils.conversion import convert_for_annotation_method +from supervision.utils.conversion import ( + ensure_cv2_image_for_annotation, + ensure_pil_image_for_annotation, +) from supervision.utils.image import crop_image, overlay_image, scale_image @@ -45,7 +48,7 @@ def __init__( self.thickness: int = thickness self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -126,7 +129,7 @@ def __init__( self.thickness: int = thickness self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -214,7 +217,7 @@ def __init__( self.opacity = opacity self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -270,8 +273,10 @@ def annotate( mask = detections.mask[detection_idx] colored_mask[mask] = color.as_bgr() - scene = cv2.addWeighted(colored_mask, self.opacity, scene, 1 - self.opacity, 0) - return scene.astype(np.uint8) + cv2.addWeighted( + colored_mask, self.opacity, scene, 1 - self.opacity, 0, dst=scene + ) + return scene class PolygonAnnotator(BaseAnnotator): @@ -301,7 +306,7 @@ def __init__( self.thickness: int = thickness self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -387,7 +392,7 @@ def __init__( self.color_lookup: ColorLookup = color_lookup self.opacity = opacity - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -426,7 +431,7 @@ def annotate( ![box-mask-annotator-example](https://media.roboflow.com/ supervision-annotator-examples/box-mask-annotator-example-purple.png) """ - mask_image = scene.copy() + scene_with_boxes = scene.copy() for detection_idx in range(len(detections)): x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int) color = resolve_color( @@ -438,14 +443,15 @@ def annotate( else custom_color_lookup, ) cv2.rectangle( - img=scene, + img=scene_with_boxes, pt1=(x1, y1), pt2=(x2, y2), color=color.as_bgr(), thickness=-1, ) - scene = cv2.addWeighted( - scene, self.opacity, mask_image, 1 - self.opacity, gamma=0 + + cv2.addWeighted( + scene_with_boxes, self.opacity, scene, 1 - self.opacity, gamma=0, dst=scene ) return scene @@ -481,7 +487,7 @@ def __init__( self.color_lookup: ColorLookup = color_lookup self.kernel_size: int = kernel_size - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -546,7 +552,8 @@ def annotate( gray = cv2.cvtColor(colored_mask, cv2.COLOR_BGR2GRAY) alpha = self.opacity * gray / gray.max() alpha_mask = alpha[:, :, np.newaxis] - scene = np.uint8(scene * (1 - alpha_mask) + colored_mask * self.opacity) + blended_scene = np.uint8(scene * (1 - alpha_mask) + colored_mask * self.opacity) + np.copyto(scene, blended_scene) return scene @@ -579,7 +586,7 @@ def __init__( self.end_angle: int = end_angle self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -670,7 +677,7 @@ def __init__( self.corner_length: int = corner_length self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -758,7 +765,7 @@ def __init__( self.thickness: int = thickness self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -851,7 +858,7 @@ def __init__( self.color_lookup: ColorLookup = color_lookup self.outline_thickness = outline_thickness - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -910,7 +917,7 @@ def annotate( return scene -class LabelAnnotator: +class LabelAnnotator(BaseAnnotator): """ A class for annotating labels on an image using provided detections. """ @@ -950,12 +957,12 @@ def __init__( self.text_anchor: Position = text_position self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, detections: Detections, - labels: List[str] = None, + labels: Optional[List[str]] = None, custom_color_lookup: Optional[np.ndarray] = None, ) -> ImageType: """ @@ -966,7 +973,7 @@ def annotate( `ImageType` is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`. detections (Detections): Object detections to annotate. - labels (List[str]): Optional. Custom labels for each detection. + labels (Optional[List[str]]): Custom labels for each detection. custom_color_lookup (Optional[np.ndarray]): Custom color lookup array. Allows to override the default color mapping strategy. @@ -976,7 +983,7 @@ def annotate( Example: ```python - import supervision as sv + import supervision as sv image = ... detections = sv.Detections(...) @@ -1004,12 +1011,9 @@ def annotate( ).astype(int) if labels is not None and len(labels) != len(detections): raise ValueError( - f"The number of labels provided ({len(labels)}) does not match the " - f"number of detections ({len(detections)}). Each detection should have " - f"a corresponding label. This discrepancy can occur if the labels and " - f"detections are not aligned or if an incorrect number of labels has " - f"been provided. Please ensure that the labels array has the same " - f"length as the Detections object." + f"The number of labels ({len(labels)}) does not match the " + f"number of detections ({len(detections)}). Each detection " + f"should have exactly 1 label." ) for detection_idx, center_coordinates in enumerate(anchors_coordinates): @@ -1111,7 +1115,7 @@ def draw_rounded_rectangle( return scene -class RichLabelAnnotator: +class RichLabelAnnotator(BaseAnnotator): """ A class for annotating labels on an image using provided detections, with support for Unicode characters by using a custom font. @@ -1121,7 +1125,7 @@ def __init__( self, color: Union[Color, ColorPalette] = ColorPalette.DEFAULT, text_color: Color = Color.WHITE, - font_path: str = None, + font_path: Optional[str] = None, font_size: int = 10, text_padding: int = 10, text_position: Position = Position.TOP_LEFT, @@ -1133,8 +1137,8 @@ def __init__( color (Union[Color, ColorPalette]): The color or color palette to use for annotating the text background. text_color (Color): The color to use for the text. - font_path (str): Path to the font file (e.g., ".ttf" or ".otf") to use for - rendering text. If `None`, the default PIL font will be used. + font_path (Optional[str]): Path to the font file (e.g., ".ttf" or ".otf") + to use for rendering text. If `None`, the default PIL font will be used. font_size (int): Font size for the text. text_padding (int): Padding around the text within its background box. text_position (Position): Position of the text relative to the detection. @@ -1155,15 +1159,16 @@ def __init__( self.font = ImageFont.truetype(font_path, font_size) except OSError: print(f"Font path '{font_path}' not found. Using PIL's default font.") - self.font = ImageFont.load_default(size=font_size) + self.font = self._load_default_font(font_size) else: - self.font = ImageFont.load_default(size=font_size) + self.font = self._load_default_font(font_size) + @ensure_pil_image_for_annotation def annotate( self, scene: ImageType, detections: Detections, - labels: List[str] = None, + labels: Optional[List[str]] = None, custom_color_lookup: Optional[np.ndarray] = None, ) -> ImageType: """ @@ -1175,7 +1180,7 @@ def annotate( `ImageType` is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`. detections (Detections): Object detections to annotate. - labels (List[str]): Optional. Custom labels for each detection. + labels (Optional[List[str]]): Custom labels for each detection. custom_color_lookup (Optional[np.ndarray]): Custom color lookup array. Allows to override the default color mapping strategy. @@ -1205,8 +1210,6 @@ def annotate( ``` """ - if isinstance(scene, np.ndarray): - scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(scene) anchors_coordinates = detections.get_anchors_coordinates( anchor=self.text_anchor @@ -1215,10 +1218,7 @@ def annotate( raise ValueError( f"The number of labels provided ({len(labels)}) does not match the " f"number of detections ({len(detections)}). Each detection should have " - f"a corresponding label. This discrepancy can occur if the labels and " - f"detections are not aligned or if an incorrect number of labels has " - f"been provided. Please ensure that the labels array has the same " - f"length as the Detections object." + f"a corresponding label." ) for detection_idx, center_coordinates in enumerate(anchors_coordinates): color = resolve_color( @@ -1266,9 +1266,21 @@ def annotate( font=self.font, fill=self.text_color.as_rgb(), ) - return scene + @staticmethod + def _load_default_font(size): + """ + PIL either loads a font that accepts a size (e.g. on my machine) + or raises an error saying `load_default` does not accept arguments + (e.g. in Colab). + """ + try: + font = ImageFont.load_default(size) + except TypeError: + font = ImageFont.load_default() + return font + class BlurAnnotator(BaseAnnotator): """ @@ -1282,7 +1294,7 @@ def __init__(self, kernel_size: int = 15): """ self.kernel_size: int = kernel_size - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -1331,7 +1343,7 @@ def annotate( return scene -class TraceAnnotator: +class TraceAnnotator(BaseAnnotator): """ A class for drawing trace paths on an image based on detection coordinates. @@ -1367,7 +1379,7 @@ def __init__( self.thickness = thickness self.color_lookup: ColorLookup = color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -1440,7 +1452,7 @@ def annotate( return scene -class HeatMapAnnotator: +class HeatMapAnnotator(BaseAnnotator): """ A class for drawing heatmaps on an image based on provided detections. Heat accumulates over time and is drawn as a semi-transparent overlay @@ -1474,7 +1486,7 @@ def __init__( self.top_hue = top_hue self.low_hue = low_hue - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate(self, scene: ImageType, detections: Detections) -> ImageType: """ Annotates the scene with a heatmap based on the provided detections. @@ -1550,7 +1562,7 @@ def __init__(self, pixel_size: int = 20): """ self.pixel_size: int = pixel_size - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -1641,7 +1653,7 @@ def __init__( self.color_lookup: ColorLookup = color_lookup self.outline_thickness: int = outline_thickness - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -1740,7 +1752,7 @@ def __init__( raise ValueError("roundness attribute must be float between (0, 1.0]") self.roundness: float = roundness - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -1877,7 +1889,7 @@ def __init__( if border_thickness is None: self.border_thickness = int(0.15 * self.height) - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, @@ -2050,7 +2062,7 @@ def __init__( self.border_thickness: int = border_thickness self.border_color_lookup: ColorLookup = border_color_lookup - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, diff --git a/supervision/detection/annotate.py b/supervision/detection/annotate.py index f496b2487..eb52171c2 100644 --- a/supervision/detection/annotate.py +++ b/supervision/detection/annotate.py @@ -5,7 +5,7 @@ from supervision.annotators.base import ImageType from supervision.detection.core import Detections from supervision.draw.color import Color, ColorPalette -from supervision.utils.conversion import convert_for_annotation_method +from supervision.utils.conversion import ensure_cv2_image_for_annotation from supervision.utils.internal import deprecated @@ -46,7 +46,7 @@ def __init__( "`BoxAnnotator` is deprecated and will be removed in " "`supervision-0.22.0`. Use `BoundingBoxAnnotator` and `LabelAnnotator` instead" ) - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate( self, scene: ImageType, diff --git a/supervision/keypoint/annotators.py b/supervision/keypoint/annotators.py index e6ff1fcfd..256b1b621 100644 --- a/supervision/keypoint/annotators.py +++ b/supervision/keypoint/annotators.py @@ -11,7 +11,7 @@ from supervision.draw.utils import draw_rounded_rectangle from supervision.keypoint.core import KeyPoints from supervision.keypoint.skeletons import SKELETONS_BY_VERTEX_COUNT -from supervision.utils.conversion import convert_for_annotation_method +from supervision.utils.conversion import ensure_cv2_image_for_annotation class BaseKeyPointAnnotator(ABC): @@ -41,7 +41,7 @@ def __init__( self.color = color self.radius = radius - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType: """ Annotates the given scene with skeleton vertices based on the provided key @@ -117,7 +117,7 @@ def __init__( self.thickness = thickness self.edges = edges - @convert_for_annotation_method + @ensure_cv2_image_for_annotation def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType: """ Annotates the given scene by drawing lines between specified key points to form diff --git a/supervision/utils/conversion.py b/supervision/utils/conversion.py index 8ddce9695..646d7dcf2 100644 --- a/supervision/utils/conversion.py +++ b/supervision/utils/conversion.py @@ -8,11 +8,13 @@ from supervision.annotators.base import ImageType -def convert_for_annotation_method(annotate_func): +def ensure_cv2_image_for_annotation(annotate_func): """ Decorates `BaseAnnotator.annotate` implementations, converts scene to an image type used internally by the annotators, converts back when annotation is complete. + + Assumes the annotators modify the scene in-place. """ @wraps(annotate_func) @@ -21,19 +23,22 @@ def wrapper(self, scene: ImageType, *args, **kwargs): return annotate_func(self, scene, *args, **kwargs) if isinstance(scene, Image.Image): - scene = pillow_to_cv2(scene) - annotated = annotate_func(self, scene, *args, **kwargs) - return cv2_to_pillow(image=annotated) + scene_np = pillow_to_cv2(scene) + annotated_np = annotate_func(self, scene_np, *args, **kwargs) + scene.paste(cv2_to_pillow(annotated_np)) + return scene raise ValueError(f"Unsupported image type: {type(scene)}") return wrapper -def convert_for_image_processing(image_processing_fun): +def ensure_cv2_image_for_processing(image_processing_fun): """ Decorates image processing functions that accept np.ndarray, converting `image` to np.ndarray, converts back when processing is complete. + + Assumes the annotators do NOT modify the scene in-place. """ @wraps(image_processing_fun) @@ -44,13 +49,37 @@ def wrapper(image: ImageType, *args, **kwargs): if isinstance(image, Image.Image): scene = pillow_to_cv2(image) annotated = image_processing_fun(scene, *args, **kwargs) - return cv2_to_pillow(image=annotated) + return cv2_to_pillow(annotated) raise ValueError(f"Unsupported image type: {type(image)}") return wrapper +def ensure_pil_image_for_annotation(annotate_func): + """ + Decorates image processing functions that accept np.ndarray, converting `image` to + PIL image, converts back when processing is complete. + + Assumes the annotators modify the scene in-place. + """ + + @wraps(annotate_func) + def wrapper(self, scene: ImageType, *args, **kwargs): + if isinstance(scene, np.ndarray): + scene_pil = cv2_to_pillow(scene) + annotated_pil = annotate_func(self, scene_pil, *args, **kwargs) + np.copyto(scene, pillow_to_cv2(annotated_pil)) + return scene + + if isinstance(scene, Image.Image): + return annotate_func(self, scene, *args, **kwargs) + + raise ValueError(f"Unsupported image type: {type(scene)}") + + return wrapper + + def images_to_cv2(images: List[ImageType]) -> List[np.ndarray]: """ Converts images provided either as Pillow images or OpenCV @@ -67,7 +96,7 @@ def images_to_cv2(images: List[ImageType]) -> List[np.ndarray]: result = [] for image in images: if issubclass(type(image), Image.Image): - image = pillow_to_cv2(image=image) + image = pillow_to_cv2(image) result.append(image) return result diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 9f2e17839..d69721893 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -14,8 +14,8 @@ from supervision.draw.utils import calculate_optimal_text_scale, draw_text from supervision.geometry.core import Point from supervision.utils.conversion import ( - convert_for_image_processing, cv2_to_pillow, + ensure_cv2_image_for_processing, images_to_cv2, ) from supervision.utils.iterables import create_batches, fill @@ -25,7 +25,7 @@ MAX_COLUMNS_FOR_SINGLE_ROW_GRID = 3 -@convert_for_image_processing +@ensure_cv2_image_for_processing def crop_image( image: ImageType, xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]], @@ -86,7 +86,7 @@ def crop_image( return image[y_min:y_max, x_min:x_max] -@convert_for_image_processing +@ensure_cv2_image_for_processing def scale_image(image: ImageType, scale_factor: float) -> ImageType: """ Scales the given image based on the given scale factor. @@ -143,7 +143,7 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType: return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) -@convert_for_image_processing +@ensure_cv2_image_for_processing def resize_image( image: ImageType, resolution_wh: Tuple[int, int], @@ -216,7 +216,7 @@ def resize_image( return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) -@convert_for_image_processing +@ensure_cv2_image_for_processing def letterbox_image( image: ImageType, resolution_wh: Tuple[int, int], diff --git a/test/utils/conftest.py b/test/utils/conftest.py index cc134f8ce..4230571b2 100644 --- a/test/utils/conftest.py +++ b/test/utils/conftest.py @@ -11,7 +11,7 @@ @fixture(scope="function") -def empty_opencv_image() -> np.ndarray: +def empty_cv2_image() -> np.ndarray: return np.zeros((128, 128, 3), dtype=np.uint8) diff --git a/test/utils/test_conversion.py b/test/utils/test_conversion.py index fb3d8faff..6ff0c630b 100644 --- a/test/utils/test_conversion.py +++ b/test/utils/test_conversion.py @@ -2,28 +2,28 @@ from PIL import Image, ImageChops from supervision.utils.conversion import ( - convert_for_image_processing, cv2_to_pillow, + ensure_cv2_image_for_processing, images_to_cv2, pillow_to_cv2, ) -def test_convert_for_image_processing_when_pillow_image_submitted( - empty_opencv_image: np.ndarray, empty_pillow_image: Image.Image +def test_ensure_cv2_image_for_processing_when_pillow_image_submitted( + empty_cv2_image: np.ndarray, empty_pillow_image: Image.Image ) -> None: # given param_a_value = 3 param_b_value = "some" - @convert_for_image_processing + @ensure_cv2_image_for_processing def my_custom_processing_function( image: np.ndarray, param_a: int, param_b: str, ) -> np.ndarray: assert np.allclose( - image, empty_opencv_image + image, empty_cv2_image ), "Expected conversion to OpenCV image to happen" assert ( param_a == param_a_value @@ -48,21 +48,21 @@ def my_custom_processing_function( ) -def test_convert_for_image_processing_when_opencv_image_submitted( - empty_opencv_image: np.ndarray, +def test_ensure_cv2_image_for_processing_when_cv2_image_submitted( + empty_cv2_image: np.ndarray, ) -> None: # given param_a_value = 3 param_b_value = "some" - @convert_for_image_processing + @ensure_cv2_image_for_processing def my_custom_processing_function( image: np.ndarray, param_a: int, param_b: str, ) -> np.ndarray: assert np.allclose( - image, empty_opencv_image + image, empty_cv2_image ), "Expected conversion to OpenCV image to happen" assert ( param_a == param_a_value @@ -74,22 +74,20 @@ def my_custom_processing_function( # when result = my_custom_processing_function( - empty_opencv_image, + empty_cv2_image, param_a_value, param_b=param_b_value, ) # then - assert ( - result is empty_opencv_image - ), "Expected to return OpenCV image without changes" + assert result is empty_cv2_image, "Expected to return OpenCV image without changes" def test_cv2_to_pillow( - empty_opencv_image: np.ndarray, empty_pillow_image: Image.Image + empty_cv2_image: np.ndarray, empty_pillow_image: Image.Image ) -> None: # when - result = cv2_to_pillow(image=empty_opencv_image) + result = cv2_to_pillow(image=empty_cv2_image) # then difference = ImageChops.difference(result, empty_pillow_image) @@ -99,14 +97,14 @@ def test_cv2_to_pillow( def test_pillow_to_cv2( - empty_opencv_image: np.ndarray, empty_pillow_image: Image.Image + empty_cv2_image: np.ndarray, empty_pillow_image: Image.Image ) -> None: # when result = pillow_to_cv2(image=empty_pillow_image) # then assert np.allclose( - result, empty_opencv_image + result, empty_cv2_image ), "Conversion to OpenCV image expected not to change the content of image" @@ -119,10 +117,10 @@ def test_images_to_cv2_when_empty_input_provided() -> None: def test_images_to_cv2_when_only_cv2_images_provided( - empty_opencv_image: np.ndarray, + empty_cv2_image: np.ndarray, ) -> None: # given - images = [empty_opencv_image] * 5 + images = [empty_cv2_image] * 5 # when result = images_to_cv2(images=images) @@ -131,13 +129,13 @@ def test_images_to_cv2_when_only_cv2_images_provided( assert len(result) == 5, "Expected the same number of output element as input ones" for result_element in result: assert ( - result_element is empty_opencv_image + result_element is empty_cv2_image ), "Expected CV images not to be touched by conversion" def test_images_to_cv2_when_only_pillow_images_provided( empty_pillow_image: Image.Image, - empty_opencv_image: np.ndarray, + empty_cv2_image: np.ndarray, ) -> None: # given images = [empty_pillow_image] * 5 @@ -149,16 +147,16 @@ def test_images_to_cv2_when_only_pillow_images_provided( assert len(result) == 5, "Expected the same number of output element as input ones" for result_element in result: assert np.allclose( - result_element, empty_opencv_image + result_element, empty_cv2_image ), "Output images expected to be equal to empty OpenCV image" def test_images_to_cv2_when_mixed_input_provided( empty_pillow_image: Image.Image, - empty_opencv_image: np.ndarray, + empty_cv2_image: np.ndarray, ) -> None: # given - images = [empty_pillow_image, empty_opencv_image] + images = [empty_pillow_image, empty_cv2_image] # when result = images_to_cv2(images=images) @@ -166,8 +164,8 @@ def test_images_to_cv2_when_mixed_input_provided( # then assert len(result) == 2, "Expected the same number of output element as input ones" assert np.allclose( - result[0], empty_opencv_image + result[0], empty_cv2_image ), "PIL image should be converted to OpenCV one, equal to example empty image" assert ( - result[1] is empty_opencv_image + result[1] is empty_cv2_image ), "Expected CV images not to be touched by conversion"