From 69c083eaeccc9eacac2258bcc60dbc4a4ca0f095 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 11:36:46 +0100
Subject: [PATCH 01/17] Add basic benchmark script

---
 benchmark_transforms.py | 129 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 benchmark_transforms.py

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
new file mode 100644
index 00000000000..3bc1222938b
--- /dev/null
+++ b/benchmark_transforms.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""
+Benchmark script for torchvision transforms performance.
+
+This script benchmarks the performance of torchvision.transforms.v2 transforms
+in various configurations and will be extended to compare against other libraries
+like OpenCV.
+
+The pipeline tested: uint8 image -> resize -> normalize (to [0,1] float)
+"""
+
+import argparse
+import torch
+import random
+from time import perf_counter_ns
+from typing import Callable, List, Tuple, Dict, Any
+import torchvision.transforms.v2.functional as F
+
+
+def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
+    """
+    Benchmark function execution time with fresh data for each experiment.
+
+    Args:
+        f: Function to benchmark
+        data_generator: Callable that returns fresh data for each experiment
+        num_exp: Number of experiments to run
+        warmup: Number of warmup runs
+
+    Returns:
+        Tensor of execution times in nanoseconds
+    """
+    for _ in range(warmup):
+        data = data_generator()
+        f(data)
+
+    times = []
+    for _ in range(num_exp):
+        data = data_generator()
+        start = perf_counter_ns()
+        result = f(data)
+        end = perf_counter_ns()
+        times.append(end - start)
+        del result
+
+    return torch.tensor(times, dtype=torch.float32)
+
+
+def report_stats(times: torch.Tensor, unit: str) -> float:
+    mul = {
+        "ns": 1,
+        "µs": 1e-3,
+        "ms": 1e-6,
+        "s": 1e-9,
+    }[unit]
+
+    times = times * mul
+    std = times.std().item()
+    med = times.median().item()
+    mean = times.mean().item()
+    min_time = times.min().item()
+    max_time = times.max().item()
+
+    print(f"  Median: {med:.2f}{unit} ± {std:.2f}{unit}")
+    print(f"  Mean: {mean:.2f}{unit}, Min: {min_time:.2f}{unit}, Max: {max_time:.2f}{unit}")
+
+    return med
+
+
+def transform_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
+    images = F.resize(images, size=target_size, antialias=True)
+    images = F.to_dtype(images, dtype=torch.float32, scale=True)
+    images = F.normalize(images, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    return images
+
+
+def run_benchmark(args) -> Dict[str, float]:
+    memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
+    print(f"\n=== TorchVision Transform Benchmark ===")
+    print(f"Threads: {args.num_threads}, Batch size: {args.batch_size}")
+    print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
+
+    torch.set_num_threads(args.num_threads)
+
+    def generate_test_images():
+        height = random.randint(args.min_size, args.max_size)
+        width = random.randint(args.min_size, args.max_size)
+
+        images = torch.randint(0, 256, (args.batch_size, 3, height, width), dtype=torch.uint8)
+
+        if memory_format == torch.channels_last:
+            images = images.to(memory_format=torch.channels_last)
+
+        return images
+
+    times = bench(
+        lambda images: transform_pipeline(images, args.target_size),
+        generate_test_images,
+        args.num_exp,
+        args.warmup,
+    )
+
+    median_time = report_stats(times, "ms")
+
+    return {"median_time_ms": median_time}
+
+
+def main():
+    """Main benchmark runner."""
+    parser = argparse.ArgumentParser(description="Benchmark torchvision transforms")
+    parser.add_argument("--num-exp", type=int, default=100, help="Number of experiments we average over")
+    parser.add_argument("--warmup", type=int, default=10, help="Number of warmup runs before running the num-exp experiments")
+    parser.add_argument("--target-size", type=int, default=224, help="Resize target size")
+    parser.add_argument("--min-size", type=int, default=128, help="Minimum input image size for random generation")
+    parser.add_argument("--max-size", type=int, default=512, help="Maximum input image size for random generation")
+    parser.add_argument("--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads()")
+    parser.add_argument("--batch-size", type=int, default=1, help="Batch size. 1 means single image processing without a batch dimension")
+    parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
+
+    args = parser.parse_args()
+
+    try:
+        result = run_benchmark(args)
+    except Exception as e:
+        print(f"ERROR: {e}")
+
+
+if __name__ == "__main__":
+    main()

From 6b8c1ec6f08499f4a88b8f634a3b9c8277ff3785 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 11:40:16 +0100
Subject: [PATCH 02/17] Nits

---
 benchmark_transforms.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 3bc1222938b..26335ce68b1 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -67,7 +67,7 @@ def report_stats(times: torch.Tensor, unit: str) -> float:
     return med
 
 
-def transform_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
+def inference_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
     images = F.resize(images, size=target_size, antialias=True)
     images = F.to_dtype(images, dtype=torch.float32, scale=True)
     images = F.normalize(images, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
@@ -94,15 +94,13 @@ def generate_test_images():
         return images
 
     times = bench(
-        lambda images: transform_pipeline(images, args.target_size),
-        generate_test_images,
-        args.num_exp,
-        args.warmup,
+        lambda images: inference_pipeline(images, args.target_size),
+        data_generator=generate_test_images,
+        num_exp=args.num_exp,
+        warmup=args.warmup,
     )
 
-    median_time = report_stats(times, "ms")
-
-    return {"median_time_ms": median_time}
+    report_stats(times, "ms")
 
 
 def main():
@@ -118,6 +116,8 @@ def main():
     parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
 
     args = parser.parse_args()
+    
+    print(f"Averaging over {args.num_exp} runs, {args.warmup} warmup runs")
 
     try:
         result = run_benchmark(args)

From 17775119cf6675ea2e645a04a01d51342f601e5e Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 12:51:01 +0100
Subject: [PATCH 03/17] Add opencv comparison

---
 benchmark_transforms.py | 69 ++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 15 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 26335ce68b1..15502548fdd 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -15,6 +15,13 @@
 from time import perf_counter_ns
 from typing import Callable, List, Tuple, Dict, Any
 import torchvision.transforms.v2.functional as F
+import numpy as np
+
+try:
+    import cv2
+    HAS_OPENCV = True
+except ImportError:
+    HAS_OPENCV = False
 
 
 def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
@@ -67,39 +74,66 @@ def report_stats(times: torch.Tensor, unit: str) -> float:
     return med
 
 
-def inference_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
-    images = F.resize(images, size=target_size, antialias=True)
+def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
+    images = F.resize(images, size=(target_size, target_size), interpolation=F.InterpolationMode.BILINEAR, antialias=True)
     images = F.to_dtype(images, dtype=torch.float32, scale=True)
     images = F.normalize(images, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
     return images
 
 
+def opencv_pipeline(images: np.ndarray, target_size: int) -> np.ndarray:
+    img = cv2.resize(images, (target_size, target_size), interpolation=cv2.INTER_LINEAR)
+    img = img.astype(np.float32) / 255.0
+    img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
+    return img
+
+
 def run_benchmark(args) -> Dict[str, float]:
-    memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
-    print(f"\n=== TorchVision Transform Benchmark ===")
+    if args.backend == "opencv" and not HAS_OPENCV:
+        raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
+    
+    backend_name = args.backend.upper()
+    print(f"\n=== {backend_name} ===")
     print(f"Threads: {args.num_threads}, Batch size: {args.batch_size}")
-    print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
 
-    torch.set_num_threads(args.num_threads)
+    memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
+    print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
+    
+    if args.backend == "torchvision":
+        torch.set_num_threads(args.num_threads)
+        pipeline = torchvision_pipeline
+    elif args.backend == "opencv":
+        cv2.setNumThreads(args.num_threads)
+        pipeline = opencv_pipeline
 
+    
     def generate_test_images():
         height = random.randint(args.min_size, args.max_size)
         width = random.randint(args.min_size, args.max_size)
-
         images = torch.randint(0, 256, (args.batch_size, 3, height, width), dtype=torch.uint8)
-
+        
+        memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
         if memory_format == torch.channels_last:
             images = images.to(memory_format=torch.channels_last)
+        
+        if args.batch_size == 1:
+            images = images[0]
+        
+        if args.backend == "opencv":
+            if args.batch_size > 1:
+                raise ValueError("Batches not supported in OpenCV pipeline (yet??)")
+            # TODO double check that contiguity requirement is respected for numpy array
+            images = images.transpose(2, 0).numpy()
 
         return images
-
+    
     times = bench(
-        lambda images: inference_pipeline(images, args.target_size),
+        lambda images: pipeline(images, args.target_size),
         data_generator=generate_test_images,
         num_exp=args.num_exp,
         warmup=args.warmup,
     )
-
+    
     report_stats(times, "ms")
 
 
@@ -114,15 +148,20 @@ def main():
     parser.add_argument("--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads()")
     parser.add_argument("--batch-size", type=int, default=1, help="Batch size. 1 means single image processing without a batch dimension")
     parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
+    all_backends = ["torchvision", "opencv"]
+    parser.add_argument("--backend", choices=all_backends + ["all"], default="all", help="Backend to use for transforms")
 
     args = parser.parse_args()
     
     print(f"Averaging over {args.num_exp} runs, {args.warmup} warmup runs")
 
-    try:
-        result = run_benchmark(args)
-    except Exception as e:
-        print(f"ERROR: {e}")
+    backends_to_run = all_backends if args.backend == "all" else args.backend
+    for backend in backends_to_run:
+        args.backend = backend
+        try:
+            result = run_benchmark(args)
+        except Exception as e:
+            print(f"ERROR with {backend}: {e}")
 
 
 if __name__ == "__main__":

From 3713461740bbd9ed6e7f207ad36e53914edb34e3 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 12:59:01 +0100
Subject: [PATCH 04/17] Add summary table

---
 benchmark_transforms.py | 60 +++++++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 15502548fdd..7087f14f98b 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -23,6 +23,8 @@
 except ImportError:
     HAS_OPENCV = False
 
+from tabulate import tabulate
+
 
 def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
     """
@@ -53,7 +55,7 @@ def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> t
     return torch.tensor(times, dtype=torch.float32)
 
 
-def report_stats(times: torch.Tensor, unit: str) -> float:
+def compute_stats(times: torch.Tensor, unit: str) -> Dict[str, float]:
     mul = {
         "ns": 1,
         "µs": 1e-3,
@@ -62,16 +64,19 @@ def report_stats(times: torch.Tensor, unit: str) -> float:
     }[unit]
 
     times = times * mul
-    std = times.std().item()
-    med = times.median().item()
-    mean = times.mean().item()
-    min_time = times.min().item()
-    max_time = times.max().item()
-
-    print(f"  Median: {med:.2f}{unit} ± {std:.2f}{unit}")
-    print(f"  Mean: {mean:.2f}{unit}, Min: {min_time:.2f}{unit}, Max: {max_time:.2f}{unit}")
+    return {
+        "std": times.std().item(),
+        "median": times.median().item(),
+        "mean": times.mean().item(),
+        "min": times.min().item(),
+        "max": times.max().item(),
+    }
 
-    return med
+def report_stats(times: torch.Tensor, unit: str) -> Dict[str, float]:
+    stats = compute_stats(times, unit)
+    print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
+    print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
+    return stats
 
 
 def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
@@ -88,7 +93,7 @@ def opencv_pipeline(images: np.ndarray, target_size: int) -> np.ndarray:
     return img
 
 
-def run_benchmark(args) -> Dict[str, float]:
+def run_benchmark(args) -> Dict[str, Any]:
     if args.backend == "opencv" and not HAS_OPENCV:
         raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
     
@@ -134,11 +139,32 @@ def generate_test_images():
         warmup=args.warmup,
     )
     
-    report_stats(times, "ms")
+    stats = report_stats(times, "ms")
+    return {"backend": args.backend, "stats": stats}
+
+
+def print_comparison_table(results: List[Dict[str, Any]]) -> None:
+    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"] == "torchvision"), None)
+    
+    table_data = []
+    for result in results:
+        stats = result["stats"]
+        relative = f"{stats['median'] / torchvision_median:.2f}x" if torchvision_median else "N/A"
+        
+        table_data.append({
+            "Backend": result["backend"],
+            "Median (ms)": f"{stats['median']:.2f}",
+            "Std (ms)": f"{stats['std']:.2f}",
+            "Mean (ms)": f"{stats['mean']:.2f}",
+            "Min (ms)": f"{stats['min']:.2f}",
+            "Max (ms)": f"{stats['max']:.2f}",
+            "Relative": relative
+        })
+    
+    print(tabulate(table_data, headers="keys", tablefmt="grid"))
 
 
 def main():
-    """Main benchmark runner."""
     parser = argparse.ArgumentParser(description="Benchmark torchvision transforms")
     parser.add_argument("--num-exp", type=int, default=100, help="Number of experiments we average over")
     parser.add_argument("--warmup", type=int, default=10, help="Number of warmup runs before running the num-exp experiments")
@@ -155,13 +181,19 @@ def main():
     
     print(f"Averaging over {args.num_exp} runs, {args.warmup} warmup runs")
 
-    backends_to_run = all_backends if args.backend == "all" else args.backend
+    backends_to_run = all_backends if args.backend == "all" else [args.backend]
+    results = []
+    
     for backend in backends_to_run:
         args.backend = backend
         try:
             result = run_benchmark(args)
+            results.append(result)
         except Exception as e:
             print(f"ERROR with {backend}: {e}")
+    
+    if len(results) > 1:
+        print_comparison_table(results)
 
 
 if __name__ == "__main__":

From 436be3f3f82b7c18ed3c8f68bd08643ba0ad6194 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 13:01:20 +0100
Subject: [PATCH 05/17] Add conversion to torch at the end of opencv pipeline

---
 benchmark_transforms.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 7087f14f98b..8082240a3e0 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -86,11 +86,12 @@ def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor
     return images
 
 
-def opencv_pipeline(images: np.ndarray, target_size: int) -> np.ndarray:
+def opencv_pipeline(images: np.ndarray, target_size: int) -> torch.Tensor:
     img = cv2.resize(images, (target_size, target_size), interpolation=cv2.INTER_LINEAR)
     img = img.astype(np.float32) / 255.0
     img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
-    return img
+    img = img.transpose(2, 0, 1)  # HWC -> CHW
+    return torch.from_numpy(img)
 
 
 def run_benchmark(args) -> Dict[str, Any]:

From a18adc522c308a00bda01b79af44125ef64faf92 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 13:07:18 +0100
Subject: [PATCH 06/17] Add PIL backend

---
 benchmark_transforms.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 8082240a3e0..b611249cdb0 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -23,8 +23,13 @@
 except ImportError:
     HAS_OPENCV = False
 
+from PIL import Image
 from tabulate import tabulate
 
+# ImageNet normalization constants
+NORM_MEAN = [0.485, 0.456, 0.406]
+NORM_STD = [0.229, 0.224, 0.225]
+
 
 def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
     """
@@ -82,18 +87,26 @@ def report_stats(times: torch.Tensor, unit: str) -> Dict[str, float]:
 def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
     images = F.resize(images, size=(target_size, target_size), interpolation=F.InterpolationMode.BILINEAR, antialias=True)
     images = F.to_dtype(images, dtype=torch.float32, scale=True)
-    images = F.normalize(images, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    images = F.normalize(images, mean=NORM_MEAN, std=NORM_STD)
     return images
 
 
 def opencv_pipeline(images: np.ndarray, target_size: int) -> torch.Tensor:
     img = cv2.resize(images, (target_size, target_size), interpolation=cv2.INTER_LINEAR)
     img = img.astype(np.float32) / 255.0
-    img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
+    img = (img - np.array(NORM_MEAN)) / np.array(NORM_STD)
     img = img.transpose(2, 0, 1)  # HWC -> CHW
     return torch.from_numpy(img)
 
 
+def pil_pipeline(images: Image.Image, target_size: int) -> torch.Tensor:
+    img = images.resize((target_size, target_size), Image.BILINEAR)
+    img = F.pil_to_tensor(img)
+    img = F.to_dtype(img, dtype=torch.float32, scale=True)
+    img = F.normalize(img, mean=NORM_MEAN, std=NORM_STD)
+    return img
+
+
 def run_benchmark(args) -> Dict[str, Any]:
     if args.backend == "opencv" and not HAS_OPENCV:
         raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
@@ -111,6 +124,9 @@ def run_benchmark(args) -> Dict[str, Any]:
     elif args.backend == "opencv":
         cv2.setNumThreads(args.num_threads)
         pipeline = opencv_pipeline
+    elif args.backend == "pil":
+        torch.set_num_threads(args.num_threads)
+        pipeline = pil_pipeline
 
     
     def generate_test_images():
@@ -127,9 +143,15 @@ def generate_test_images():
         
         if args.backend == "opencv":
             if args.batch_size > 1:
-                raise ValueError("Batches not supported in OpenCV pipeline (yet??)")
+                raise ValueError("Batches not supported in OpenCV pipeline")
             # TODO double check that contiguity requirement is respected for numpy array
-            images = images.transpose(2, 0).numpy()
+            images = images.numpy().transpose(1, 2, 0)
+        elif args.backend == "pil":
+            if args.batch_size > 1:
+                raise ValueError("Batches not supported in PIL pipeline")
+            # Convert to PIL Image (CHW -> HWC)
+            images = images.numpy().transpose(1, 2, 0)
+            images = Image.fromarray(images)
 
         return images
     
@@ -175,7 +197,7 @@ def main():
     parser.add_argument("--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads()")
     parser.add_argument("--batch-size", type=int, default=1, help="Batch size. 1 means single image processing without a batch dimension")
     parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
-    all_backends = ["torchvision", "opencv"]
+    all_backends = ["torchvision", "opencv", "pil"]
     parser.add_argument("--backend", choices=all_backends + ["all"], default="all", help="Backend to use for transforms")
 
     args = parser.parse_args()

From 5d1b0992f36b462328fb51b5019c940f9eaf8e42 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 13:27:49 +0100
Subject: [PATCH 07/17] cosmetics

---
 benchmark_transforms.py | 66 ++++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index b611249cdb0..56c978cf7a1 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -60,7 +60,7 @@ def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> t
     return torch.tensor(times, dtype=torch.float32)
 
 
-def compute_stats(times: torch.Tensor, unit: str) -> Dict[str, float]:
+def report_stats(times: torch.Tensor, unit: str, verbose: bool = True) -> Dict[str, float]:
     mul = {
         "ns": 1,
         "µs": 1e-3,
@@ -69,18 +69,18 @@ def compute_stats(times: torch.Tensor, unit: str) -> Dict[str, float]:
     }[unit]
 
     times = times * mul
-    return {
+    stats = {
         "std": times.std().item(),
         "median": times.median().item(),
         "mean": times.mean().item(),
         "min": times.min().item(),
         "max": times.max().item(),
     }
-
-def report_stats(times: torch.Tensor, unit: str) -> Dict[str, float]:
-    stats = compute_stats(times, unit)
-    print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
-    print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
+    
+    if verbose:
+        print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
+        print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
+    
     return stats
 
 
@@ -91,43 +91,52 @@ def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor
     return images
 
 
-def opencv_pipeline(images: np.ndarray, target_size: int) -> torch.Tensor:
-    img = cv2.resize(images, (target_size, target_size), interpolation=cv2.INTER_LINEAR)
+def opencv_pipeline(image: np.ndarray, target_size: int) -> torch.Tensor:
+    img = cv2.resize(image, (target_size, target_size), interpolation=cv2.INTER_LINEAR)  # no antialias in OpenCV
     img = img.astype(np.float32) / 255.0
     img = (img - np.array(NORM_MEAN)) / np.array(NORM_STD)
     img = img.transpose(2, 0, 1)  # HWC -> CHW
     return torch.from_numpy(img)
 
 
-def pil_pipeline(images: Image.Image, target_size: int) -> torch.Tensor:
-    img = images.resize((target_size, target_size), Image.BILINEAR)
+def pil_pipeline(image: Image.Image, target_size: int) -> torch.Tensor:
+    img = image.resize((target_size, target_size), Image.BILINEAR)  # PIL forces antialias
     img = F.pil_to_tensor(img)
     img = F.to_dtype(img, dtype=torch.float32, scale=True)
     img = F.normalize(img, mean=NORM_MEAN, std=NORM_STD)
     return img
 
 
+# TODO double check that this works as expected: no graph break, and no issues with dynamic shapes
+compiled_torchvision_pipeline = torch.compile(torchvision_pipeline, mode="default", fullgraph=True, dynamic=True)
+
+
 def run_benchmark(args) -> Dict[str, Any]:
-    if args.backend == "opencv" and not HAS_OPENCV:
+    backend = args.backend.lower()
+    
+    if backend == "opencv" and not HAS_OPENCV:
         raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
     
-    backend_name = args.backend.upper()
-    print(f"\n=== {backend_name} ===")
-    print(f"Threads: {args.num_threads}, Batch size: {args.batch_size}")
+    if args.verbose:
+        backend_display = args.backend.upper()
+        print(f"\n=== {backend_display} ===")
+        print(f"Threads: {args.num_threads}, Batch size: {args.batch_size}")
 
-    memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
-    print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
+        memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
+        print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
     
-    if args.backend == "torchvision":
+    if backend == "tv":
         torch.set_num_threads(args.num_threads)
         pipeline = torchvision_pipeline
-    elif args.backend == "opencv":
+    elif backend == "tv-compiled":
+        torch.set_num_threads(args.num_threads)
+        pipeline = compiled_torchvision_pipeline
+    elif backend == "opencv":
         cv2.setNumThreads(args.num_threads)
         pipeline = opencv_pipeline
-    elif args.backend == "pil":
+    elif backend == "pil":
         torch.set_num_threads(args.num_threads)
         pipeline = pil_pipeline
-
     
     def generate_test_images():
         height = random.randint(args.min_size, args.max_size)
@@ -141,12 +150,12 @@ def generate_test_images():
         if args.batch_size == 1:
             images = images[0]
         
-        if args.backend == "opencv":
+        if backend == "opencv":
             if args.batch_size > 1:
                 raise ValueError("Batches not supported in OpenCV pipeline")
             # TODO double check that contiguity requirement is respected for numpy array
             images = images.numpy().transpose(1, 2, 0)
-        elif args.backend == "pil":
+        elif backend == "pil":
             if args.batch_size > 1:
                 raise ValueError("Batches not supported in PIL pipeline")
             # Convert to PIL Image (CHW -> HWC)
@@ -162,12 +171,12 @@ def generate_test_images():
         warmup=args.warmup,
     )
     
-    stats = report_stats(times, "ms")
+    stats = report_stats(times, "ms", args.verbose)
     return {"backend": args.backend, "stats": stats}
 
 
 def print_comparison_table(results: List[Dict[str, Any]]) -> None:
-    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"] == "torchvision"), None)
+    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"].lower() == "tv"), None)
     
     table_data = []
     for result in results:
@@ -197,14 +206,15 @@ def main():
     parser.add_argument("--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads()")
     parser.add_argument("--batch-size", type=int, default=1, help="Batch size. 1 means single image processing without a batch dimension")
     parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
-    all_backends = ["torchvision", "opencv", "pil"]
-    parser.add_argument("--backend", choices=all_backends + ["all"], default="all", help="Backend to use for transforms")
+    all_backends = ["tv", "tv-compiled", "opencv", "pil"]
+    parser.add_argument("--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to use for transforms")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
 
     args = parser.parse_args()
     
     print(f"Averaging over {args.num_exp} runs, {args.warmup} warmup runs")
 
-    backends_to_run = all_backends if args.backend == "all" else [args.backend]
+    backends_to_run = all_backends if args.backend.lower() == "all" else [args.backend]
     results = []
     
     for backend in backends_to_run:

From 9a10b0782e2ea87eab9a3db9682310765c765457 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 13:31:13 +0100
Subject: [PATCH 08/17] Add albumentation backend

---
 benchmark_transforms.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 56c978cf7a1..69ae47ba347 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -23,6 +23,12 @@
 except ImportError:
     HAS_OPENCV = False
 
+try:
+    import albumentations as A
+    HAS_ALBUMENTATIONS = True
+except ImportError:
+    HAS_ALBUMENTATIONS = False
+
 from PIL import Image
 from tabulate import tabulate
 
@@ -107,6 +113,16 @@ def pil_pipeline(image: Image.Image, target_size: int) -> torch.Tensor:
     return img
 
 
+def albumentations_pipeline(image: np.ndarray, target_size: int) -> torch.Tensor:
+    transform = A.Compose([
+        A.Resize(target_size, target_size, interpolation=cv2.INTER_LINEAR),
+        A.Normalize(mean=NORM_MEAN, std=NORM_STD, max_pixel_value=255.0)
+    ])
+    img = transform(image=image)["image"]
+    img = torch.from_numpy(img).permute(2, 0, 1)
+    return img
+
+
 # TODO double check that this works as expected: no graph break, and no issues with dynamic shapes
 compiled_torchvision_pipeline = torch.compile(torchvision_pipeline, mode="default", fullgraph=True, dynamic=True)
 
@@ -116,6 +132,8 @@ def run_benchmark(args) -> Dict[str, Any]:
     
     if backend == "opencv" and not HAS_OPENCV:
         raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
+    if backend == "albumentations" and not HAS_ALBUMENTATIONS:
+        raise RuntimeError("Albumentations not available. Install with: pip install albumentations")
     
     if args.verbose:
         backend_display = args.backend.upper()
@@ -137,6 +155,9 @@ def run_benchmark(args) -> Dict[str, Any]:
     elif backend == "pil":
         torch.set_num_threads(args.num_threads)
         pipeline = pil_pipeline
+    elif backend == "albumentations":
+        cv2.setNumThreads(args.num_threads)
+        pipeline = albumentations_pipeline
     
     def generate_test_images():
         height = random.randint(args.min_size, args.max_size)
@@ -161,6 +182,11 @@ def generate_test_images():
             # Convert to PIL Image (CHW -> HWC)
             images = images.numpy().transpose(1, 2, 0)
             images = Image.fromarray(images)
+        elif backend == "albumentations":
+            if args.batch_size > 1:
+                # TODO is that true????
+                raise ValueError("Batches not supported in Albumentations pipeline")
+            images = images.numpy().transpose(1, 2, 0)
 
         return images
     
@@ -206,7 +232,7 @@ def main():
     parser.add_argument("--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads()")
     parser.add_argument("--batch-size", type=int, default=1, help="Batch size. 1 means single image processing without a batch dimension")
     parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
-    all_backends = ["tv", "tv-compiled", "opencv", "pil"]
+    all_backends = ["tv", "tv-compiled", "opencv", "pil", "albumentations"]
     parser.add_argument("--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to use for transforms")
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
 

From f07ae8ecc3613d72e851b07eced80f15979719b0 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 13:41:31 +0100
Subject: [PATCH 09/17] Added kornia backend

---
 benchmark_transforms.py | 129 ++++++++++++++++++++++++++++------------
 1 file changed, 91 insertions(+), 38 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index 69ae47ba347..add98f9301b 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -19,16 +19,26 @@
 
 try:
     import cv2
+
     HAS_OPENCV = True
 except ImportError:
     HAS_OPENCV = False
 
 try:
     import albumentations as A
+
     HAS_ALBUMENTATIONS = True
 except ImportError:
     HAS_ALBUMENTATIONS = False
 
+try:
+    import kornia as K
+    import kornia.augmentation as KA
+
+    HAS_KORNIA = True
+except ImportError:
+    HAS_KORNIA = False
+
 from PIL import Image
 from tabulate import tabulate
 
@@ -82,16 +92,18 @@ def report_stats(times: torch.Tensor, unit: str, verbose: bool = True) -> Dict[s
         "min": times.min().item(),
         "max": times.max().item(),
     }
-    
+
     if verbose:
         print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
         print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
-    
+
     return stats
 
 
 def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
-    images = F.resize(images, size=(target_size, target_size), interpolation=F.InterpolationMode.BILINEAR, antialias=True)
+    images = F.resize(
+        images, size=(target_size, target_size), interpolation=F.InterpolationMode.BILINEAR, antialias=True
+    )
     images = F.to_dtype(images, dtype=torch.float32, scale=True)
     images = F.normalize(images, mean=NORM_MEAN, std=NORM_STD)
     return images
@@ -114,27 +126,44 @@ def pil_pipeline(image: Image.Image, target_size: int) -> torch.Tensor:
 
 
 def albumentations_pipeline(image: np.ndarray, target_size: int) -> torch.Tensor:
-    transform = A.Compose([
-        A.Resize(target_size, target_size, interpolation=cv2.INTER_LINEAR),
-        A.Normalize(mean=NORM_MEAN, std=NORM_STD, max_pixel_value=255.0)
-    ])
+    transform = A.Compose(
+        [
+            A.Resize(target_size, target_size, interpolation=cv2.INTER_LINEAR),
+            A.Normalize(mean=NORM_MEAN, std=NORM_STD, max_pixel_value=255.0),
+        ]
+    )
     img = transform(image=image)["image"]
     img = torch.from_numpy(img).permute(2, 0, 1)
     return img
 
 
+def kornia_pipeline(image: torch.Tensor, target_size: int) -> torch.Tensor:
+    # Kornia expects float tensors in [0, 1] range
+    # TODO check that this is needed?
+    img = image.float() / 255.0
+    img = img.unsqueeze(0)  # Add batch dimension for kornia
+
+    img = K.geometry.transform.resize(img, (target_size, target_size), interpolation="bilinear")
+
+    img = K.enhance.normalize(img, mean=torch.tensor(NORM_MEAN), std=torch.tensor(NORM_STD))
+
+    return img.squeeze(0)  # Remove batch dimension
+
+
 # TODO double check that this works as expected: no graph break, and no issues with dynamic shapes
 compiled_torchvision_pipeline = torch.compile(torchvision_pipeline, mode="default", fullgraph=True, dynamic=True)
 
 
 def run_benchmark(args) -> Dict[str, Any]:
     backend = args.backend.lower()
-    
+
     if backend == "opencv" and not HAS_OPENCV:
         raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
     if backend == "albumentations" and not HAS_ALBUMENTATIONS:
         raise RuntimeError("Albumentations not available. Install with: pip install albumentations")
-    
+    if backend == "kornia" and not HAS_KORNIA:
+        raise RuntimeError("Kornia not available. Install with: pip install kornia")
+
     if args.verbose:
         backend_display = args.backend.upper()
         print(f"\n=== {backend_display} ===")
@@ -142,7 +171,7 @@ def run_benchmark(args) -> Dict[str, Any]:
 
         memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
         print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
-    
+
     if backend == "tv":
         torch.set_num_threads(args.num_threads)
         pipeline = torchvision_pipeline
@@ -158,19 +187,22 @@ def run_benchmark(args) -> Dict[str, Any]:
     elif backend == "albumentations":
         cv2.setNumThreads(args.num_threads)
         pipeline = albumentations_pipeline
-    
+    elif backend == "kornia":
+        torch.set_num_threads(args.num_threads)
+        pipeline = kornia_pipeline
+
     def generate_test_images():
         height = random.randint(args.min_size, args.max_size)
         width = random.randint(args.min_size, args.max_size)
         images = torch.randint(0, 256, (args.batch_size, 3, height, width), dtype=torch.uint8)
-        
+
         memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
         if memory_format == torch.channels_last:
             images = images.to(memory_format=torch.channels_last)
-        
+
         if args.batch_size == 1:
             images = images[0]
-        
+
         if backend == "opencv":
             if args.batch_size > 1:
                 raise ValueError("Batches not supported in OpenCV pipeline")
@@ -187,62 +219,83 @@ def generate_test_images():
                 # TODO is that true????
                 raise ValueError("Batches not supported in Albumentations pipeline")
             images = images.numpy().transpose(1, 2, 0)
+        elif backend == "kornia":
+            if args.batch_size > 1:
+                # TODO is that true????
+                raise ValueError("Batches not supported in Kornia pipeline")
 
         return images
-    
+
     times = bench(
         lambda images: pipeline(images, args.target_size),
         data_generator=generate_test_images,
         num_exp=args.num_exp,
         warmup=args.warmup,
     )
-    
+
     stats = report_stats(times, "ms", args.verbose)
     return {"backend": args.backend, "stats": stats}
 
 
 def print_comparison_table(results: List[Dict[str, Any]]) -> None:
     torchvision_median = next((r["stats"]["median"] for r in results if r["backend"].lower() == "tv"), None)
-    
+
     table_data = []
     for result in results:
         stats = result["stats"]
         relative = f"{stats['median'] / torchvision_median:.2f}x" if torchvision_median else "N/A"
-        
-        table_data.append({
-            "Backend": result["backend"],
-            "Median (ms)": f"{stats['median']:.2f}",
-            "Std (ms)": f"{stats['std']:.2f}",
-            "Mean (ms)": f"{stats['mean']:.2f}",
-            "Min (ms)": f"{stats['min']:.2f}",
-            "Max (ms)": f"{stats['max']:.2f}",
-            "Relative": relative
-        })
-    
+
+        table_data.append(
+            {
+                "Backend": result["backend"],
+                "Median (ms)": f"{stats['median']:.2f}",
+                "Std (ms)": f"{stats['std']:.2f}",
+                "Mean (ms)": f"{stats['mean']:.2f}",
+                "Min (ms)": f"{stats['min']:.2f}",
+                "Max (ms)": f"{stats['max']:.2f}",
+                "Relative": relative,
+            }
+        )
+
     print(tabulate(table_data, headers="keys", tablefmt="grid"))
 
 
 def main():
     parser = argparse.ArgumentParser(description="Benchmark torchvision transforms")
     parser.add_argument("--num-exp", type=int, default=100, help="Number of experiments we average over")
-    parser.add_argument("--warmup", type=int, default=10, help="Number of warmup runs before running the num-exp experiments")
-    parser.add_argument("--target-size", type=int, default=224, help="Resize target size")
+    parser.add_argument(
+        "--warmup", type=int, default=10, help="Number of warmup runs before running the num-exp experiments"
+    )
+    parser.add_argument(
+        "--target-size", type=int, default=224, help="size parameter of the Resize step, for both H and W."
+    )
     parser.add_argument("--min-size", type=int, default=128, help="Minimum input image size for random generation")
     parser.add_argument("--max-size", type=int, default=512, help="Maximum input image size for random generation")
-    parser.add_argument("--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads()")
-    parser.add_argument("--batch-size", type=int, default=1, help="Batch size. 1 means single image processing without a batch dimension")
-    parser.add_argument("--contiguity", choices=["CL", "CF"], default="CF", help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)")
-    all_backends = ["tv", "tv-compiled", "opencv", "pil", "albumentations"]
-    parser.add_argument("--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to use for transforms")
+    parser.add_argument(
+        "--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads() & Co"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=1, help="Batch size. 1 means single 3D image without a batch dimension"
+    )
+    parser.add_argument(
+        "--contiguity",
+        choices=["CL", "CF"],
+        default="CF",
+        help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)",
+    )
+    all_backends = ["tv", "tv-compiled", "opencv", "pil", "albumentations", "kornia"]
+    parser.add_argument(
+        "--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to benchmark"
+    )
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
 
     args = parser.parse_args()
-    
+
     print(f"Averaging over {args.num_exp} runs, {args.warmup} warmup runs")
 
     backends_to_run = all_backends if args.backend.lower() == "all" else [args.backend]
     results = []
-    
+
     for backend in backends_to_run:
         args.backend = backend
         try:
@@ -250,7 +303,7 @@ def main():
             results.append(result)
         except Exception as e:
             print(f"ERROR with {backend}: {e}")
-    
+
     if len(results) > 1:
         print_comparison_table(results)
 

From 4d1ba4dfdfc5088e0b8abeb1902fa40c2d890e65 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 13:53:06 +0100
Subject: [PATCH 10/17] Silence torch.compile warnings

---
 benchmark_transforms.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index add98f9301b..e3f0c37df15 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -12,11 +12,20 @@
 import argparse
 import torch
 import random
+import warnings
 from time import perf_counter_ns
 from typing import Callable, List, Tuple, Dict, Any
 import torchvision.transforms.v2.functional as F
 import numpy as np
 
+# Filter out the specific TF32 warning
+warnings.filterwarnings(
+    "ignore",
+    message="Please use the new API settings to control TF32 behavior.*",
+    category=UserWarning,
+    module="torch.backends.cuda",
+)
+
 try:
     import cv2
 

From f13f6f100cfa86f964125b6e9bb8ea5dd592ebf5 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 14:12:52 +0100
Subject: [PATCH 11/17] Add CUDA, didn't test. Add benchmark info

---
 benchmark_transforms.py | 71 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/benchmark_transforms.py b/benchmark_transforms.py
index e3f0c37df15..f72dc19a73c 100644
--- a/benchmark_transforms.py
+++ b/benchmark_transforms.py
@@ -50,6 +50,7 @@
 
 from PIL import Image
 from tabulate import tabulate
+import torchvision
 
 # ImageNet normalization constants
 NORM_MEAN = [0.485, 0.456, 0.406]
@@ -165,6 +166,14 @@ def kornia_pipeline(image: torch.Tensor, target_size: int) -> torch.Tensor:
 
 def run_benchmark(args) -> Dict[str, Any]:
     backend = args.backend.lower()
+    
+    device = args.device.lower()
+    # Check device compatibility
+    if device == 'cuda' and backend not in ['tv', 'tv-compiled']:
+        raise RuntimeError(f"CUDA device not supported for {backend} backend. Only 'tv' and 'tv-compiled' support CUDA.")
+    
+    if device == 'cuda' and not torch.cuda.is_available():
+        raise RuntimeError("CUDA not available. Install cuda-enabled torch and torchvision, or use 'cpu' device.")
 
     if backend == "opencv" and not HAS_OPENCV:
         raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
@@ -176,7 +185,7 @@ def run_benchmark(args) -> Dict[str, Any]:
     if args.verbose:
         backend_display = args.backend.upper()
         print(f"\n=== {backend_display} ===")
-        print(f"Threads: {args.num_threads}, Batch size: {args.batch_size}")
+        print(f"Device: {device}, Threads: {args.num_threads}, Batch size: {args.batch_size}")
 
         memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
         print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
@@ -208,6 +217,10 @@ def generate_test_images():
         memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
         if memory_format == torch.channels_last:
             images = images.to(memory_format=torch.channels_last)
+        
+        # Move to device for torchvision backends
+        if backend in ['tv', 'tv-compiled']:
+            images = images.to(device)
 
         if args.batch_size == 1:
             images = images[0]
@@ -269,6 +282,59 @@ def print_comparison_table(results: List[Dict[str, Any]]) -> None:
     print(tabulate(table_data, headers="keys", tablefmt="grid"))
 
 
+def print_benchmark_info(args):
+    """Print benchmark configuration and library versions."""
+    device = args.device.lower()
+    if device in ['gpu', 'cuda']:
+        device = 'cuda'
+    else:
+        device = 'cpu'
+        
+    memory_format = 'channels_last' if args.contiguity == 'CL' else 'channels_first'
+    
+    config = [
+        ["Device", device],
+        ["Threads", args.num_threads],
+        ["Batch size", args.batch_size],
+        ["Memory format", memory_format],
+        ["Experiments", f"{args.num_exp} (+ {args.warmup} warmup)"],
+        ["Input → output size", f"{args.min_size}-{args.max_size} → {args.target_size}×{args.target_size}"],
+    ]
+    
+    print(tabulate(config, headers=["Parameter", "Value"], tablefmt="simple"))
+    print()
+    
+    versions = [
+        ["PyTorch", torch.__version__],
+        ["TorchVision", torchvision.__version__],
+    ]
+    
+    if HAS_OPENCV:
+        versions.append(["OpenCV", cv2.__version__])
+    else:
+        versions.append(["OpenCV", "Not available"])
+
+    try:
+        versions.append(["PIL/Pillow", Image.__version__])
+    except AttributeError:
+        versions.append(["PIL/Pillow", "Version unavailable"])
+    
+    if HAS_ALBUMENTATIONS:
+        versions.append(["Albumentations", A.__version__])
+    else:
+        versions.append(["Albumentations", "Not available"])
+        
+    if HAS_KORNIA:
+        versions.append(["Kornia", K.__version__])
+    else:
+        versions.append(["Kornia", "Not available"])
+    
+    print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))
+    
+    print("=" * 80)
+    print()
+
+
 def main():
     parser = argparse.ArgumentParser(description="Benchmark torchvision transforms")
     parser.add_argument("--num-exp", type=int, default=100, help="Number of experiments we average over")
@@ -297,10 +363,11 @@ def main():
         "--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to benchmark"
     )
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
+    parser.add_argument("--device", type=str, default="cpu", help="Device to use: cpu, cuda, or gpu (default: cpu)")
 
     args = parser.parse_args()
 
-    print(f"Averaging over {args.num_exp} runs, {args.warmup} warmup runs")
+    print_benchmark_info(args)
 
     backends_to_run = all_backends if args.backend.lower() == "all" else [args.backend]
     results = []

From fd12d6a5ffe88be2cf508eb27029fad41f7c2f58 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 14:26:58 +0100
Subject: [PATCH 12/17] move into benchmarks/ folder

---
 .../benchmark_transforms.py                   | 134 +-------------
 benchmarks/utils.py                           | 168 ++++++++++++++++++
 2 files changed, 170 insertions(+), 132 deletions(-)
 rename benchmark_transforms.py => benchmarks/benchmark_transforms.py (70%)
 create mode 100644 benchmarks/utils.py

diff --git a/benchmark_transforms.py b/benchmarks/benchmark_transforms.py
similarity index 70%
rename from benchmark_transforms.py
rename to benchmarks/benchmark_transforms.py
index f72dc19a73c..e65b7819509 100644
--- a/benchmark_transforms.py
+++ b/benchmarks/benchmark_transforms.py
@@ -13,10 +13,10 @@
 import torch
 import random
 import warnings
-from time import perf_counter_ns
-from typing import Callable, List, Tuple, Dict, Any
+from typing import Dict, Any
 import torchvision.transforms.v2.functional as F
 import numpy as np
+from utils import bench, report_stats, print_comparison_table, print_benchmark_info
 
 # Filter out the specific TF32 warning
 warnings.filterwarnings(
@@ -28,14 +28,12 @@
 
 try:
     import cv2
-
     HAS_OPENCV = True
 except ImportError:
     HAS_OPENCV = False
 
 try:
     import albumentations as A
-
     HAS_ALBUMENTATIONS = True
 except ImportError:
     HAS_ALBUMENTATIONS = False
@@ -43,71 +41,17 @@
 try:
     import kornia as K
     import kornia.augmentation as KA
-
     HAS_KORNIA = True
 except ImportError:
     HAS_KORNIA = False
 
 from PIL import Image
-from tabulate import tabulate
-import torchvision
 
 # ImageNet normalization constants
 NORM_MEAN = [0.485, 0.456, 0.406]
 NORM_STD = [0.229, 0.224, 0.225]
 
 
-def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
-    """
-    Benchmark function execution time with fresh data for each experiment.
-
-    Args:
-        f: Function to benchmark
-        data_generator: Callable that returns fresh data for each experiment
-        num_exp: Number of experiments to run
-        warmup: Number of warmup runs
-
-    Returns:
-        Tensor of execution times in nanoseconds
-    """
-    for _ in range(warmup):
-        data = data_generator()
-        f(data)
-
-    times = []
-    for _ in range(num_exp):
-        data = data_generator()
-        start = perf_counter_ns()
-        result = f(data)
-        end = perf_counter_ns()
-        times.append(end - start)
-        del result
-
-    return torch.tensor(times, dtype=torch.float32)
-
-
-def report_stats(times: torch.Tensor, unit: str, verbose: bool = True) -> Dict[str, float]:
-    mul = {
-        "ns": 1,
-        "µs": 1e-3,
-        "ms": 1e-6,
-        "s": 1e-9,
-    }[unit]
-
-    times = times * mul
-    stats = {
-        "std": times.std().item(),
-        "median": times.median().item(),
-        "mean": times.mean().item(),
-        "min": times.min().item(),
-        "max": times.max().item(),
-    }
-
-    if verbose:
-        print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
-        print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
-
-    return stats
 
 
 def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
@@ -259,80 +203,6 @@ def generate_test_images():
     return {"backend": args.backend, "stats": stats}
 
 
-def print_comparison_table(results: List[Dict[str, Any]]) -> None:
-    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"].lower() == "tv"), None)
-
-    table_data = []
-    for result in results:
-        stats = result["stats"]
-        relative = f"{stats['median'] / torchvision_median:.2f}x" if torchvision_median else "N/A"
-
-        table_data.append(
-            {
-                "Backend": result["backend"],
-                "Median (ms)": f"{stats['median']:.2f}",
-                "Std (ms)": f"{stats['std']:.2f}",
-                "Mean (ms)": f"{stats['mean']:.2f}",
-                "Min (ms)": f"{stats['min']:.2f}",
-                "Max (ms)": f"{stats['max']:.2f}",
-                "Relative": relative,
-            }
-        )
-
-    print(tabulate(table_data, headers="keys", tablefmt="grid"))
-
-
-def print_benchmark_info(args):
-    """Print benchmark configuration and library versions."""
-    device = args.device.lower()
-    if device in ['gpu', 'cuda']:
-        device = 'cuda'
-    else:
-        device = 'cpu'
-        
-    memory_format = 'channels_last' if args.contiguity == 'CL' else 'channels_first'
-    
-    config = [
-        ["Device", device],
-        ["Threads", args.num_threads],
-        ["Batch size", args.batch_size],
-        ["Memory format", memory_format],
-        ["Experiments", f"{args.num_exp} (+ {args.warmup} warmup)"],
-        ["Input → output size", f"{args.min_size}-{args.max_size} → {args.target_size}×{args.target_size}"],
-    ]
-    
-    print(tabulate(config, headers=["Parameter", "Value"], tablefmt="simple"))
-    print()
-    
-    versions = [
-        ["PyTorch", torch.__version__],
-        ["TorchVision", torchvision.__version__],
-    ]
-    
-    if HAS_OPENCV:
-        versions.append(["OpenCV", cv2.__version__])
-    else:
-        versions.append(["OpenCV", "Not available"])
-
-    try:
-        versions.append(["PIL/Pillow", Image.__version__])
-    except AttributeError:
-        versions.append(["PIL/Pillow", "Version unavailable"])
-    
-    if HAS_ALBUMENTATIONS:
-        versions.append(["Albumentations", A.__version__])
-    else:
-        versions.append(["Albumentations", "Not available"])
-        
-    if HAS_KORNIA:
-        versions.append(["Kornia", K.__version__])
-    else:
-        versions.append(["Kornia", "Not available"])
-    
-    print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))
-    
-    print("=" * 80)
-    print()
 
 
 def main():
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
new file mode 100644
index 00000000000..0fd479d7935
--- /dev/null
+++ b/benchmarks/utils.py
@@ -0,0 +1,168 @@
+"""
+Utility functions for benchmarking transforms.
+"""
+
+import torch
+import torchvision
+from time import perf_counter_ns
+from typing import Callable, List, Dict, Any
+from tabulate import tabulate
+
+try:
+    import cv2
+    HAS_OPENCV = True
+except ImportError:
+    HAS_OPENCV = False
+
+try:
+    import albumentations as A
+    HAS_ALBUMENTATIONS = True
+except ImportError:
+    HAS_ALBUMENTATIONS = False
+
+try:
+    import kornia as K
+    HAS_KORNIA = True
+except ImportError:
+    HAS_KORNIA = False
+
+from PIL import Image
+
+
+def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
+    """
+    Benchmark function execution time with fresh data for each experiment.
+
+    Args:
+        f: Function to benchmark
+        data_generator: Callable that returns fresh data for each experiment
+        num_exp: Number of experiments to run
+        warmup: Number of warmup runs
+
+    Returns:
+        Tensor of execution times in nanoseconds
+    """
+    for _ in range(warmup):
+        data = data_generator()
+        f(data)
+
+    times = []
+    for _ in range(num_exp):
+        data = data_generator()
+        start = perf_counter_ns()
+        result = f(data)
+        end = perf_counter_ns()
+        times.append(end - start)
+        del result
+
+    return torch.tensor(times, dtype=torch.float32)
+
+
+def report_stats(times: torch.Tensor, unit: str, verbose: bool = True) -> Dict[str, float]:
+    mul = {
+        "ns": 1,
+        "µs": 1e-3,
+        "ms": 1e-6,
+        "s": 1e-9,
+    }[unit]
+
+    times = times * mul
+    stats = {
+        "std": times.std().item(),
+        "median": times.median().item(),
+        "mean": times.mean().item(),
+        "min": times.min().item(),
+        "max": times.max().item(),
+    }
+
+    if verbose:
+        print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
+        print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
+
+    return stats
+
+
+def print_comparison_table(results: List[Dict[str, Any]]) -> None:
+    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"].lower() == "tv"), None)
+
+    table_data = []
+    for result in results:
+        stats = result["stats"]
+        relative = f"{stats['median'] / torchvision_median:.2f}x" if torchvision_median else "N/A"
+
+        table_data.append(
+            {
+                "Backend": result["backend"],
+                "Median (ms)": f"{stats['median']:.2f}",
+                "Std (ms)": f"{stats['std']:.2f}",
+                "Mean (ms)": f"{stats['mean']:.2f}",
+                "Min (ms)": f"{stats['min']:.2f}",
+                "Max (ms)": f"{stats['max']:.2f}",
+                "Relative": relative,
+            }
+        )
+
+    print(tabulate(table_data, headers="keys", tablefmt="grid"))
+
+
+def print_benchmark_info(args):
+    """Print benchmark configuration and library versions."""
+    device = args.device.lower()
+    if device in ['gpu', 'cuda']:
+        device = 'cuda'
+    else:
+        device = 'cpu'
+        
+    memory_format = 'channels_last' if args.contiguity == 'CL' else 'channels_first'
+    
+    print("=" * 80)
+    print("BENCHMARK CONFIGURATION")
+    print("=" * 80)
+    
+    # Collect configuration info
+    config = [
+        ["Device", device],
+        ["Threads", args.num_threads],
+        ["Batch size", args.batch_size],
+        ["Memory format", memory_format],
+        ["Experiments", f"{args.num_exp} (+ {args.warmup} warmup)"],
+        ["Input → output size", f"{args.min_size}-{args.max_size} → {args.target_size}×{args.target_size}"],
+    ]
+    
+    print(tabulate(config, headers=["Parameter", "Value"], tablefmt="simple"))
+    print()
+    
+    print("=" * 80)
+    print("LIBRARY VERSIONS")
+    print("=" * 80)
+    
+    # Collect library versions
+    versions = [
+        ["PyTorch", torch.__version__],
+        ["TorchVision", torchvision.__version__],
+    ]
+    
+    if HAS_OPENCV:
+        versions.append(["OpenCV", cv2.__version__])
+    else:
+        versions.append(["OpenCV", "Not available"])
+        
+    # PIL version
+    try:
+        versions.append(["PIL/Pillow", Image.__version__])
+    except AttributeError:
+        versions.append(["PIL/Pillow", "Version unavailable"])
+    
+    if HAS_ALBUMENTATIONS:
+        versions.append(["Albumentations", A.__version__])
+    else:
+        versions.append(["Albumentations", "Not available"])
+        
+    if HAS_KORNIA:
+        versions.append(["Kornia", K.__version__])
+    else:
+        versions.append(["Kornia", "Not available"])
+    
+    print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))
+    print("=" * 80)
+    print()
\ No newline at end of file

From b88b9065e0671a9380b75af2bbbee8bdd0e1773d Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Tue, 12 Aug 2025 14:30:33 +0100
Subject: [PATCH 13/17] cosmetics

---
 benchmarks/benchmark_transforms.py | 25 +++++++-------
 benchmarks/utils.py                | 54 +++++++-----------------------
 2 files changed, 26 insertions(+), 53 deletions(-)

diff --git a/benchmarks/benchmark_transforms.py b/benchmarks/benchmark_transforms.py
index e65b7819509..1511f917d8a 100644
--- a/benchmarks/benchmark_transforms.py
+++ b/benchmarks/benchmark_transforms.py
@@ -28,12 +28,14 @@
 
 try:
     import cv2
+
     HAS_OPENCV = True
 except ImportError:
     HAS_OPENCV = False
 
 try:
     import albumentations as A
+
     HAS_ALBUMENTATIONS = True
 except ImportError:
     HAS_ALBUMENTATIONS = False
@@ -41,6 +43,7 @@
 try:
     import kornia as K
     import kornia.augmentation as KA
+
     HAS_KORNIA = True
 except ImportError:
     HAS_KORNIA = False
@@ -52,8 +55,6 @@
 NORM_STD = [0.229, 0.224, 0.225]
 
 
-
-
 def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
     images = F.resize(
         images, size=(target_size, target_size), interpolation=F.InterpolationMode.BILINEAR, antialias=True
@@ -110,13 +111,15 @@ def kornia_pipeline(image: torch.Tensor, target_size: int) -> torch.Tensor:
 
 def run_benchmark(args) -> Dict[str, Any]:
     backend = args.backend.lower()
-    
+
     device = args.device.lower()
     # Check device compatibility
-    if device == 'cuda' and backend not in ['tv', 'tv-compiled']:
-        raise RuntimeError(f"CUDA device not supported for {backend} backend. Only 'tv' and 'tv-compiled' support CUDA.")
-    
-    if device == 'cuda' and not torch.cuda.is_available():
+    if device == "cuda" and backend not in ["tv", "tv-compiled"]:
+        raise RuntimeError(
+            f"CUDA device not supported for {backend} backend. Only 'tv' and 'tv-compiled' support CUDA."
+        )
+
+    if device == "cuda" and not torch.cuda.is_available():
         raise RuntimeError("CUDA not available. Install cuda-enabled torch and torchvision, or use 'cpu' device.")
 
     if backend == "opencv" and not HAS_OPENCV:
@@ -161,9 +164,9 @@ def generate_test_images():
         memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
         if memory_format == torch.channels_last:
             images = images.to(memory_format=torch.channels_last)
-        
+
         # Move to device for torchvision backends
-        if backend in ['tv', 'tv-compiled']:
+        if backend in ["tv", "tv-compiled"]:
             images = images.to(device)
 
         if args.batch_size == 1:
@@ -203,8 +206,6 @@ def generate_test_images():
     return {"backend": args.backend, "stats": stats}
 
 
-
-
 def main():
     parser = argparse.ArgumentParser(description="Benchmark torchvision transforms")
     parser.add_argument("--num-exp", type=int, default=100, help="Number of experiments we average over")
@@ -233,7 +234,7 @@ def main():
         "--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to benchmark"
     )
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
-    parser.add_argument("--device", type=str, default="cpu", help="Device to use: cpu, cuda, or gpu (default: cpu)")
+    parser.add_argument("--device", type=str, default="cpu", help="Device to use: cpu or cuda (default: cpu)")
 
     args = parser.parse_args()
 
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
index 0fd479d7935..cc157065840 100644
--- a/benchmarks/utils.py
+++ b/benchmarks/utils.py
@@ -10,18 +10,21 @@
 
 try:
     import cv2
+
     HAS_OPENCV = True
 except ImportError:
     HAS_OPENCV = False
 
 try:
     import albumentations as A
+
     HAS_ALBUMENTATIONS = True
 except ImportError:
     HAS_ALBUMENTATIONS = False
 
 try:
     import kornia as K
+
     HAS_KORNIA = True
 except ImportError:
     HAS_KORNIA = False
@@ -108,17 +111,9 @@ def print_comparison_table(results: List[Dict[str, Any]]) -> None:
 def print_benchmark_info(args):
     """Print benchmark configuration and library versions."""
     device = args.device.lower()
-    if device in ['gpu', 'cuda']:
-        device = 'cuda'
-    else:
-        device = 'cpu'
-        
-    memory_format = 'channels_last' if args.contiguity == 'CL' else 'channels_first'
-    
-    print("=" * 80)
-    print("BENCHMARK CONFIGURATION")
-    print("=" * 80)
-    
+
+    memory_format = "channels_last" if args.contiguity == "CL" else "channels_first"
+
     # Collect configuration info
     config = [
         ["Device", device],
@@ -128,41 +123,18 @@ def print_benchmark_info(args):
         ["Experiments", f"{args.num_exp} (+ {args.warmup} warmup)"],
         ["Input → output size", f"{args.min_size}-{args.max_size} → {args.target_size}×{args.target_size}"],
     ]
-    
+
     print(tabulate(config, headers=["Parameter", "Value"], tablefmt="simple"))
     print()
-    
-    print("=" * 80)
-    print("LIBRARY VERSIONS")
-    print("=" * 80)
-    
+
     # Collect library versions
     versions = [
         ["PyTorch", torch.__version__],
         ["TorchVision", torchvision.__version__],
+        ["OpenCV", cv2.__version__ if HAS_OPENCV else "Not available"],
+        ["PIL/Pillow", getattr(Image, '__version__', "Version unavailable")],
+        ["Albumentations", A.__version__ if HAS_ALBUMENTATIONS else "Not available"],
+        ["Kornia", K.__version__ if HAS_KORNIA else "Not available"],
     ]
-    
-    if HAS_OPENCV:
-        versions.append(["OpenCV", cv2.__version__])
-    else:
-        versions.append(["OpenCV", "Not available"])
-        
-    # PIL version
-    try:
-        versions.append(["PIL/Pillow", Image.__version__])
-    except AttributeError:
-        versions.append(["PIL/Pillow", "Version unavailable"])
-    
-    if HAS_ALBUMENTATIONS:
-        versions.append(["Albumentations", A.__version__])
-    else:
-        versions.append(["Albumentations", "Not available"])
-        
-    if HAS_KORNIA:
-        versions.append(["Kornia", K.__version__])
-    else:
-        versions.append(["Kornia", "Not available"])
-    
+
     print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))
-    print("=" * 80)
-    print()
\ No newline at end of file

From 9478b4d57d7592c81ed7c7ca6c51dabb16c8139a Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Wed, 27 Aug 2025 10:26:40 +0100
Subject: [PATCH 14/17] Allow multiple backends, invert 'relative' column to
 show speed-up against first row

---
 benchmarks/benchmark_transforms.py | 15 +++++++++++++--
 benchmarks/utils.py                |  7 ++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/benchmarks/benchmark_transforms.py b/benchmarks/benchmark_transforms.py
index 1511f917d8a..85a22406e30 100644
--- a/benchmarks/benchmark_transforms.py
+++ b/benchmarks/benchmark_transforms.py
@@ -231,7 +231,7 @@ def main():
     )
     all_backends = ["tv", "tv-compiled", "opencv", "pil", "albumentations", "kornia"]
     parser.add_argument(
-        "--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to benchmark"
+        "--backends", type=str, default="all", help="Backends to benchmark (comma-separated list or 'all'). First backend is used as reference for comparison."
     )
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
     parser.add_argument("--device", type=str, default="cpu", help="Device to use: cpu or cuda (default: cpu)")
@@ -240,7 +240,18 @@ def main():
 
     print_benchmark_info(args)
 
-    backends_to_run = all_backends if args.backend.lower() == "all" else [args.backend]
+    # Parse backends parameter
+    if args.backends.lower() == "all":
+        backends_to_run = all_backends
+    else:
+        backends_to_run = [backend.strip().lower() for backend in args.backends.split(",")]
+        # Validate backends
+        invalid_backends = [b for b in backends_to_run if b not in all_backends]
+        if invalid_backends:
+            print(f"ERROR: Invalid backends: {', '.join(invalid_backends)}")
+            print(f"Available backends: {', '.join(all_backends)}")
+            return
+
     results = []
 
     for backend in backends_to_run:
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
index cc157065840..772748b4842 100644
--- a/benchmarks/utils.py
+++ b/benchmarks/utils.py
@@ -86,12 +86,13 @@ def report_stats(times: torch.Tensor, unit: str, verbose: bool = True) -> Dict[s
 
 
 def print_comparison_table(results: List[Dict[str, Any]]) -> None:
-    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"].lower() == "tv"), None)
+    # Use first backend as reference for relative comparison
+    reference_median = results[0]["stats"]["median"] if results else None
 
     table_data = []
     for result in results:
         stats = result["stats"]
-        relative = f"{stats['median'] / torchvision_median:.2f}x" if torchvision_median else "N/A"
+        speed_up = f"{reference_median / stats['median']:.2f}x" if reference_median else "N/A"
 
         table_data.append(
             {
@@ -101,7 +102,7 @@ def print_comparison_table(results: List[Dict[str, Any]]) -> None:
                 "Mean (ms)": f"{stats['mean']:.2f}",
                 "Min (ms)": f"{stats['min']:.2f}",
                 "Max (ms)": f"{stats['max']:.2f}",
-                "Relative": relative,
+                "Speed-up": speed_up,
             }
         )
 

From 8882898771a45a1a040d84b403eaa4110a3e107f Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Wed, 27 Aug 2025 10:28:04 +0100
Subject: [PATCH 15/17] Add tv-v1 pipeline

---
 benchmarks/benchmark_transforms.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmark_transforms.py b/benchmarks/benchmark_transforms.py
index 85a22406e30..3c5efadff46 100644
--- a/benchmarks/benchmark_transforms.py
+++ b/benchmarks/benchmark_transforms.py
@@ -15,6 +15,7 @@
 import warnings
 from typing import Dict, Any
 import torchvision.transforms.v2.functional as F
+import torchvision.transforms.functional as Fv1
 import numpy as np
 from utils import bench, report_stats, print_comparison_table, print_benchmark_info
 
@@ -63,6 +64,14 @@ def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor
     images = F.normalize(images, mean=NORM_MEAN, std=NORM_STD)
     return images
 
+def torchvision_v1_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
+    images = images.float() / 255.  # rough equivalent of to_tensor()
+    images = Fv1.resize(
+        images, size=(target_size, target_size), interpolation=Fv1.InterpolationMode.BILINEAR, antialias=True
+    )
+    images = Fv1.normalize(images, mean=NORM_MEAN, std=NORM_STD)
+    return images
+
 
 def opencv_pipeline(image: np.ndarray, target_size: int) -> torch.Tensor:
     img = cv2.resize(image, (target_size, target_size), interpolation=cv2.INTER_LINEAR)  # no antialias in OpenCV
@@ -140,6 +149,9 @@ def run_benchmark(args) -> Dict[str, Any]:
     if backend == "tv":
         torch.set_num_threads(args.num_threads)
         pipeline = torchvision_pipeline
+    if backend == "tv-v1":
+        torch.set_num_threads(args.num_threads)
+        pipeline = torchvision_v1_pipeline
     elif backend == "tv-compiled":
         torch.set_num_threads(args.num_threads)
         pipeline = compiled_torchvision_pipeline
@@ -229,7 +241,7 @@ def main():
         default="CF",
         help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)",
     )
-    all_backends = ["tv", "tv-compiled", "opencv", "pil", "albumentations", "kornia"]
+    all_backends = ["tv", "tv-v1", "tv-compiled", "opencv", "pil", "albumentations", "kornia"]
     parser.add_argument(
         "--backends", type=str, default="all", help="Backends to benchmark (comma-separated list or 'all'). First backend is used as reference for comparison."
     )

From 1f6d7d9e31211c735acc7bb61bbcc8ed2ed2ecf7 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Wed, 27 Aug 2025 10:28:27 +0100
Subject: [PATCH 16/17] lint

---
 benchmarks/benchmark_transforms.py | 19 ++++++++++++-------
 benchmarks/utils.py                |  7 ++++---
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/benchmarks/benchmark_transforms.py b/benchmarks/benchmark_transforms.py
index 3c5efadff46..afa38c0fcd5 100644
--- a/benchmarks/benchmark_transforms.py
+++ b/benchmarks/benchmark_transforms.py
@@ -10,14 +10,15 @@
 """
 
 import argparse
-import torch
 import random
 import warnings
-from typing import Dict, Any
-import torchvision.transforms.v2.functional as F
-import torchvision.transforms.functional as Fv1
+from typing import Any, Dict
+
 import numpy as np
-from utils import bench, report_stats, print_comparison_table, print_benchmark_info
+import torch
+import torchvision.transforms.functional as Fv1
+import torchvision.transforms.v2.functional as F
+from utils import bench, print_benchmark_info, print_comparison_table, report_stats
 
 # Filter out the specific TF32 warning
 warnings.filterwarnings(
@@ -64,8 +65,9 @@ def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor
     images = F.normalize(images, mean=NORM_MEAN, std=NORM_STD)
     return images
 
+
 def torchvision_v1_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
-    images = images.float() / 255.  # rough equivalent of to_tensor()
+    images = images.float() / 255.0  # rough equivalent of to_tensor()
     images = Fv1.resize(
         images, size=(target_size, target_size), interpolation=Fv1.InterpolationMode.BILINEAR, antialias=True
     )
@@ -243,7 +245,10 @@ def main():
     )
     all_backends = ["tv", "tv-v1", "tv-compiled", "opencv", "pil", "albumentations", "kornia"]
     parser.add_argument(
-        "--backends", type=str, default="all", help="Backends to benchmark (comma-separated list or 'all'). First backend is used as reference for comparison."
+        "--backends",
+        type=str,
+        default="all",
+        help="Backends to benchmark (comma-separated list or 'all'). First backend is used as reference for comparison.",
     )
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
     parser.add_argument("--device", type=str, default="cpu", help="Device to use: cpu or cuda (default: cpu)")
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
index 772748b4842..a7bdc768101 100644
--- a/benchmarks/utils.py
+++ b/benchmarks/utils.py
@@ -2,10 +2,11 @@
 Utility functions for benchmarking transforms.
 """
 
+from time import perf_counter_ns
+from typing import Any, Callable, Dict, List
+
 import torch
 import torchvision
-from time import perf_counter_ns
-from typing import Callable, List, Dict, Any
 from tabulate import tabulate
 
 try:
@@ -133,7 +134,7 @@ def print_benchmark_info(args):
         ["PyTorch", torch.__version__],
         ["TorchVision", torchvision.__version__],
         ["OpenCV", cv2.__version__ if HAS_OPENCV else "Not available"],
-        ["PIL/Pillow", getattr(Image, '__version__', "Version unavailable")],
+        ["PIL/Pillow", getattr(Image, "__version__", "Version unavailable")],
         ["Albumentations", A.__version__ if HAS_ALBUMENTATIONS else "Not available"],
         ["Kornia", K.__version__ if HAS_KORNIA else "Not available"],
     ]

From 73af38dfda428a2eeaf5e427529a1b0822577db9 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Wed, 27 Aug 2025 10:35:23 +0100
Subject: [PATCH 17/17] cosmetics

---
 benchmarks/utils.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/benchmarks/utils.py b/benchmarks/utils.py
index a7bdc768101..600abca2a4e 100644
--- a/benchmarks/utils.py
+++ b/benchmarks/utils.py
@@ -103,7 +103,7 @@ def print_comparison_table(results: List[Dict[str, Any]]) -> None:
                 "Mean (ms)": f"{stats['mean']:.2f}",
                 "Min (ms)": f"{stats['min']:.2f}",
                 "Max (ms)": f"{stats['max']:.2f}",
-                "Speed-up": speed_up,
+                "Speed-up\nagainst 1st row": speed_up,
             }
         )
 
@@ -116,6 +116,19 @@ def print_benchmark_info(args):
 
     memory_format = "channels_last" if args.contiguity == "CL" else "channels_first"
 
+    # Collect library versions
+    versions = [
+        ["PyTorch", torch.__version__],
+        ["TorchVision", torchvision.__version__],
+        ["OpenCV", cv2.__version__ if HAS_OPENCV else "Not available"],
+        ["PIL/Pillow", getattr(Image, "__version__", "Version unavailable")],
+        ["Albumentations", A.__version__ if HAS_ALBUMENTATIONS else "Not available"],
+        ["Kornia", K.__version__ if HAS_KORNIA else "Not available"],
+    ]
+
+    print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))
+    print()
+
     # Collect configuration info
     config = [
         ["Device", device],
@@ -128,15 +141,3 @@ def print_benchmark_info(args):
 
     print(tabulate(config, headers=["Parameter", "Value"], tablefmt="simple"))
     print()
-
-    # Collect library versions
-    versions = [
-        ["PyTorch", torch.__version__],
-        ["TorchVision", torchvision.__version__],
-        ["OpenCV", cv2.__version__ if HAS_OPENCV else "Not available"],
-        ["PIL/Pillow", getattr(Image, "__version__", "Version unavailable")],
-        ["Albumentations", A.__version__ if HAS_ALBUMENTATIONS else "Not available"],
-        ["Kornia", K.__version__ if HAS_KORNIA else "Not available"],
-    ]
-
-    print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))