rapidsai · grlee77 · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025
@@ -225,7 +225,7 @@ exclude = '''
 #   codespell --toml python/cucim/pyproject.toml . -i 3 -w
 skip = "build*,dist,.cache,html,_build,_deps,3rdparty/*,_static,generated,latex,.git,*.ipynb,test_data/input/LICENSE-3rdparty,jitify_testing"
 # ignore-regex = ""
-ignore-words-list = "ans,coo,boun,bui,gool,hart,lond,manuel,nd,paeth,unser,wronly"
+ignore-words-list = "ans,coo,boun,bu,bui,gool,hart,lond,manuel,nd,paeth,unser,wronly"
 quiet-level = 3
 
 # to undo: ./test_data/input/LICENSE-3rdparty
@@ -128,12 +128,13 @@ def pdist_max_blockwise(
         )
 
     blocks_per_dim = math.ceil(num_coords / coords_per_block)
+    coords = coords.astype(xp.float64, copy=False)
     if blocks_per_dim > 1:
         # reuse the same temporary storage array for most blocks
         # (last block in row and column may be smaller)
-        temp = xp.zeros((coords_per_block, coords_per_block), dtype=xp.float32)
-    if coords.dtype not in [xp.float32, xp.float64]:
-        coords = coords.astype(xp.float32, copy=False)
+        temp = xp.zeros(
+            (coords_per_block, coords_per_block), dtype=coords.dtype
+        )
     if not coords.flags.c_contiguous:
         coords = xp.ascontiguousarray(coords)
     max_dist = 0

@@ -27,6 +27,28 @@
 ]
 
 
+# For n nonzero elements cupy.nonzero returns a tuple of length ndim where
+# each element is an array of size (n, ) corresponding to the coordinates on
+# a specific axis.
+#
+# Often for regionprops purposes we would rather have a single array of
+# size (n, ndim) instead of a the tuple of arrays.
+#
+# CuPy's `_ndarray_argwhere` (used internally by cupy.nonzero) already provides
+# this but is not part of the public API. To guard against potential future
+# change we provide a less efficient fallback implementation.
+try:
+    from cupy._core._routines_indexing import _ndarray_argwhere
+except ImportError:
+
+    def _ndarray_argwhere(a):
+        """Stack the result of cupy.nonzero into a single array
+
+        output shape will be (num_nonzero, ndim)
+        """
+        return cp.stack(cp.nonzero(a), axis=-1)
+
+
 def _count_wrappers(func):
     """Count the number of wrappers around `func`."""
     unwrapped = func

@@ -808,7 +808,7 @@ def binary_propagation(
     )
 
 
-def _binary_fill_holes_non_iterative(input, output=None):
+def _binary_fill_holes_non_iterative(input, structure=None, output=None):
     """Non-iterative method for hole filling.
 
     This algorithm is based on inverting the input and then using `label` to
@@ -832,7 +832,9 @@ def _binary_fill_holes_non_iterative(input, output=None):
 
     # assign unique labels the background and holes
     inverse_binary_mask = ~binary_mask
-    inverse_labels, _ = _measurements.label(inverse_binary_mask)
+    inverse_labels, _ = _measurements.label(
+        inverse_binary_mask, structure=structure
+    )
 
     # After inversion, what was originally the background will now be the
     # first foreground label encountered. This is ensured due to the
@@ -851,6 +853,13 @@ def _binary_fill_holes_non_iterative(input, output=None):
     if output is None:
         output = cupy.ascontiguousarray(temp)
     else:
+        # handle output argument as in _binary_erosion
+        if isinstance(output, cupy.ndarray):
+            if output.dtype.kind == "c":
+                raise TypeError("Complex output type not supported")
+        else:
+            output = bool
+        output = _util._get_output(output, input)
         output[:] = temp
     return output
 
@@ -901,15 +910,13 @@ def binary_fill_holes(
     filter_all_axes = axes == tuple(range(input.ndim))
     if isinstance(origin, int):
         origin = (origin,) * len(axes)
-    if structure is None and all(o == 0 for o in origin) and filter_all_axes:
+    if all(o == 0 for o in origin) and filter_all_axes:
         return _binary_fill_holes_non_iterative(input, output=output)
-    else:
-        if filter_all_axes:
-            warnings.warn(
-                "It is recommended to keep the default structure=None and "
-                "origin=0, so that a faster non-iterative algorithm can be "
-                "used."
-            )
+    elif filter_all_axes:
+        warnings.warn(
+            "It is recommended to keep the default origin=0 so that a faster "
+            "non-iterative algorithm can be used."
+        )
     mask = cupy.logical_not(input)
     tmp = cupy.zeros(mask.shape, bool)
     inplace = isinstance(output, cupy.ndarray)

@@ -4,190 +4,9 @@
 import cupy as cp
 import numpy as np
 
-_order0_or_1 = """
-    mc[0] = m[0];
-"""
-
-_order2_2d = """
-    /* Implementation of the commented code below with C-order raveled
-     * indices into 3 x 3 matrices, m and mc.
-     *
-     * mc[0, 0] = m[0, 0];
-     * cx = m[1, 0] / m[0, 0];
-     * cy = m[0, 1] / m[0, 0];
-     * mc[1, 1] = m[1, 1] - cx*m[0, 1];
-     * mc[2, 0] = m[2, 0] - cx*m[1, 0];
-     * mc[0, 2] = m[0, 2] - cy*m[0, 1];
-     */
-    mc[0] = m[0];
-    F cx = m[3] / m[0];
-    F cy = m[1] / m[0];
-    mc[4] = m[4] - cx*m[1];
-    mc[6] = m[6] - cx*m[3];
-    mc[2] = m[2] - cy*m[1];
-"""
-
-_order3_2d = """
-    /* Implementation of the commented code below with C-order raveled
-     * indices into 4 x 4 matrices, m and mc.
-     *
-     * mc[0, 0] = m[0, 0];
-     * cx = m[1, 0] / m[0, 0];
-     * cy = m[0, 1] / m[0, 0];
-     * mc[1, 1] = m[1, 1] - cx*m[0, 1];
-     * mc[2, 0] = m[2, 0] - cx*m[1, 0];
-     * mc[0, 2] = m[0, 2] - cy*m[0, 1];
-     * mc[2, 1] = (m[2, 1] - 2*cx*m[1, 1] - cy*m[2, 0] + cx*cx*m[0, 1] + cy*cx*m[1, 0]);
-     * mc[1, 2] = (m[1, 2] - 2*cy*m[1, 1] - cx*m[0, 2] + 2*cy*cx*m[0, 1]);
-     * mc[3, 0] = m[3, 0] - 3*cx*m[2, 0] + 2*cx*cx*m[1, 0];
-     * mc[0, 3] = m[0, 3] - 3*cy*m[0, 2] + 2*cy*cx*m[0, 1];
-     */
-
-    mc[0] = m[0];
-    F cx = m[4] / m[0];
-    F cy = m[1] / m[0];
-    // 2nd order moments
-    mc[5] = m[5] - cx*m[1];
-    mc[8] = m[8] - cx*m[4];
-    mc[2] = m[2] - cy*m[1];
-    // 3rd order moments
-    mc[9] = (m[9] - 2*cx*m[5] - cy*m[8] + cx*cx*m[1] + cy*cx*m[4]);
-    mc[6] = (m[6] - 2*cy*m[5] - cx*m[2] + 2*cy*cx*m[1]);
-    mc[12] = m[12] - 3*cx*m[8] + 2*cx*cx*m[4];
-    mc[3] = m[3] - 3*cy*m[2] + 2*cy*cy*m[1];
-"""  # noqa
-
-
-# Note for 2D kernels using C-order raveled indices
-_order2_3d = """
-    /* Implementation of the commented code below with C-order raveled
-     * indices into shape (3, 3, 3) matrices, m and mc.
-     *
-     * mc[0, 0, 0] = m[0, 0, 0];
-     * cx = m[1, 0, 0] / m[0, 0, 0];
-     * cy = m[0, 1, 0] / m[0, 0, 0];
-     * cz = m[0, 0, 1] / m[0, 0, 0];
-     * mc[0, 0, 2] = -cz*m[0, 0, 1] + m[0, 0, 2];
-     * mc[0, 1, 1] = -cy*m[0, 0, 1] + m[0, 1, 1];
-     * mc[0, 2, 0] = -cy*m[0, 1, 0] + m[0, 2, 0];
-     * mc[1, 0, 1] = -cx*m[0, 0, 1] + m[1, 0, 1];
-     * mc[1, 1, 0] = -cx*m[0, 1, 0] + m[1, 1, 0];
-     * mc[2, 0, 0] = -cx*m[1, 0, 0] + m[2, 0, 0];
-     */
-    mc[0] = m[0];
-    F cx = m[9] / m[0];
-    F cy = m[3] / m[0];
-    F cz = m[1] / m[0];
-    // 2nd order moments
-    mc[2] = -cz*m[1] + m[2];
-    mc[4] = -cy*m[1] + m[4];
-    mc[6] = -cy*m[3] + m[6];
-    mc[10] = -cx*m[1] + m[10];
-    mc[12] = -cx*m[3] + m[12];
-    mc[18] = -cx*m[9] + m[18];
-"""
-
-_order3_3d = """
-    /* Implementation of the commented code below with C-order raveled
-     * indices into shape (4, 4, 4) matrices, m and mc.
-     *
-     * mc[0, 0, 0] = m[0, 0, 0];
-     * cx = m[1, 0, 0] / m[0, 0, 0];
-     * cy = m[0, 1, 0] / m[0, 0, 0];
-     * cz = m[0, 0, 1] / m[0, 0, 0];
-     * // 2nd order moments
-     * mc[0, 0, 2] = -cz*m[0, 0, 1] + m[0, 0, 2];
-     * mc[0, 1, 1] = -cy*m[0, 0, 1] + m[0, 1, 1];
-     * mc[0, 2, 0] = -cy*m[0, 1, 0] + m[0, 2, 0];
-     * mc[1, 0, 1] = -cx*m[0, 0, 1] + m[1, 0, 1];
-     * mc[1, 1, 0] = -cx*m[0, 1, 0] + m[1, 1, 0];
-     * mc[2, 0, 0] = -cx*m[1, 0, 0] + m[2, 0, 0];
-     * // 3rd order moments
-     * mc[0, 0, 3] = (2*cz*cz*m[0, 0, 1] - 3*cz*m[0, 0, 2] + m[0, 0, 3]);
-     * mc[0, 1, 2] = (-cy*m[0, 0, 2] + 2*cz*(cy*m[0, 0, 1] - m[0, 1, 1]) + m[0, 1, 2]);
-     * mc[0, 2, 1] = (cy*cy*m[0, 0, 1] - 2*cy*m[0, 1, 1] + cz*(cy*m[0, 1, 0] - m[0, 2, 0]) + m[0, 2, 1]);
-     * mc[0, 3, 0] = (2*cy*cy*m[0, 1, 0] - 3*cy*m[0, 2, 0] + m[0, 3, 0]);
-     * mc[1, 0, 2] = (-cx*m[0, 0, 2] + 2*cz*(cx*m[0, 0, 1] - m[1, 0, 1]) + m[1, 0, 2]);
-     * mc[1, 1, 1] = (-cx*m[0, 1, 1] + cy*(cx*m[0, 0, 1] - m[1, 0, 1]) + cz*(cx*m[0, 1, 0] - m[1, 1, 0]) + m[1, 1, 1]);
-     * mc[1, 2, 0] = (-cx*m[0, 2, 0] - 2*cy*(-cx*m[0, 1, 0] + m[1, 1, 0]) + m[1, 2, 0]);
-     * mc[2, 0, 1] = (cx*cx*m[0, 0, 1] - 2*cx*m[1, 0, 1] + cz*(cx*m[1, 0, 0] - m[2, 0, 0]) + m[2, 0, 1]);
-     * mc[2, 1, 0] = (cx*cx*m[0, 1, 0] - 2*cx*m[1, 1, 0] + cy*(cx*m[1, 0, 0] - m[2, 0, 0]) + m[2, 1, 0]);
-     * mc[3, 0, 0] = (2*cx*cx*m[1, 0, 0] - 3*cx*m[2, 0, 0] + m[3, 0, 0]);
-     */
-    mc[0] = m[0];
-    F cx = m[16] / m[0];
-    F cy = m[4] / m[0];
-    F cz = m[1] / m[0];
-    // 2nd order moments
-    mc[2] = -cz*m[1] + m[2];
-    mc[5] = -cy*m[1] + m[5];
-    mc[8] = -cy*m[4] + m[8];
-    mc[17] = -cx*m[1] + m[17];
-    mc[20] = -cx*m[4] + m[20];
-    mc[32] = -cx*m[16] + m[32];
-    // 3rd order moments
-    mc[3] = (2*cz*cz*m[1] - 3*cz*m[2] + m[3]);
-    mc[6] = (-cy*m[2] + 2*cz*(cy*m[1] - m[5]) + m[6]);
-    mc[9] = (cy*cy*m[1] - 2*cy*m[5] + cz*(cy*m[4] - m[8]) + m[9]);
-    mc[12] = (2*cy*cy*m[4] - 3*cy*m[8] + m[12]);
-    mc[18] = (-cx*m[2] + 2*cz*(cx*m[1] - m[17]) + m[18]);
-    mc[21] = (-cx*m[5] + cy*(cx*m[1] - m[17]) + cz*(cx*m[4] - m[20]) + m[21]);
-    mc[24] = (-cx*m[8] - 2*cy*(-cx*m[4] + m[20]) + m[24]);
-    mc[33] = (cx*cx*m[1] - 2*cx*m[17] + cz*(cx*m[16] - m[32]) + m[33]);
-    mc[36] = (cx*cx*m[4] - 2*cx*m[20] + cy*(cx*m[16] - m[32]) + m[36]);
-    mc[48] = (2*cx*cx*m[16] - 3*cx*m[32] + m[48]);
-"""  # noqa
-
-
-def _moments_raw_to_central_fast(moments_raw):
-    """Analytical formulae for 2D and 3D central moments of order < 4.
-
-    `moments_raw_to_central` will automatically call this function when
-    ndim < 4 and order < 4.
-
-    Parameters
-    ----------
-    moments_raw : ndarray
-        The raw moments.
-
-    Returns
-    -------
-    moments_central : ndarray
-        The central moments.
-    """
-    ndim = moments_raw.ndim
-    order = moments_raw.shape[0] - 1
-    # convert to float64 during the computation for better accuracy
-    moments_raw = moments_raw.astype(cp.float64, copy=False)
-    moments_central = cp.zeros_like(moments_raw)
-    if order >= 4 or ndim not in [2, 3]:
-        raise ValueError(
-            "This function only supports 2D or 3D moments of order < 4."
-        )
-    if ndim == 2:
-        if order < 2:
-            operation = _order0_or_1
-        elif order == 2:
-            operation = _order2_2d
-        elif order == 3:
-            operation = _order3_2d
-    elif ndim == 3:
-        if order < 2:
-            operation = _order0_or_1
-        elif order == 2:
-            operation = _order2_3d
-        elif order == 3:
-            operation = _order3_3d
-
-    kernel = cp.ElementwiseKernel(
-        "raw F m",
-        "raw F mc",
-        operation=operation,
-        name=f"order{order}_{ndim}d_kernel",
-    )
-    # run a single-threaded kernel, so we can avoid device->host->device copy
-    kernel(moments_raw, moments_central, size=1)
-    return moments_central
+from ._regionprops_gpu_moments_kernels import (
+    regionprops_moments_central,
+)
 
 
 def moments_raw_to_central(moments_raw):
@@ -196,7 +15,12 @@ def moments_raw_to_central(moments_raw):
     if ndim in [2, 3] and order < 4:
         # fast path with analytical GPU kernels
         # (avoids any host/device transfers)
-        moments_central = _moments_raw_to_central_fast(moments_raw)
+
+        # have to temporarily prepend a "labels" dimension to reuse
+        # regionprops_moments_central
+        moments_central = regionprops_moments_central(
+            moments_raw[cp.newaxis, ...], ndim
+        )[0]
         return moments_central.astype(moments_raw.dtype, copy=False)
 
     # Fallback to general formula applied on the host

@@ -258,51 +258,6 @@ def func2d(self, *args, **kwargs):
     return func2d
 
 
-def _inertia_eigvals_to_axes_lengths_3D(inertia_tensor_eigvals):
-    """Compute ellipsoid axis lengths from inertia tensor eigenvalues.
-
-    Parameters
-    ---------
-    inertia_tensor_eigvals : sequence of float
-        A sequence of 3 floating point eigenvalues, sorted in descending order.
-
-    Returns
-    -------
-    axis_lengths : list of float
-        The ellipsoid axis lengths sorted in descending order.
-
-    Notes
-    -----
-    Let a >= b >= c be the ellipsoid semi-axes and s1 >= s2 >= s3 be the
-    inertia tensor eigenvalues.
-
-    The inertia tensor eigenvalues are given for a solid ellipsoid in [1]_.
-    s1 = 1 / 5 * (a**2 + b**2)
-    s2 = 1 / 5 * (a**2 + c**2)
-    s3 = 1 / 5 * (b**2 + c**2)
-
-    Rearranging to solve for a, b, c in terms of s1, s2, s3 gives
-    a = math.sqrt(5 / 2 * ( s1 + s2 - s3))
-    b = math.sqrt(5 / 2 * ( s1 - s2 + s3))
-    c = math.sqrt(5 / 2 * (-s1 + s2 + s3))
-
-    We can then simply replace sqrt(5/2) by sqrt(10) to get the full axes
-    lengths rather than the semi-axes lengths.
-
-    References
-    ----------
-    ..[1] https://en.wikipedia.org/wiki/List_of_moments_of_inertia#List_of_3D_inertia_tensors
-    """  # noqa: E501
-    axis_lengths = []
-    for ax in range(2, -1, -1):
-        w = sum(
-            v * -1 if i == ax else v
-            for i, v in enumerate(inertia_tensor_eigvals)
-        )
-        axis_lengths.append(math.sqrt(10 * w))
-    return axis_lengths
-
-
 class RegionProperties:
     """Please refer to `skimage.measure.regionprops` for more information
     on the available region properties.
@@ -611,7 +566,6 @@ def axis_major_length(self):
             l1 = self.inertia_tensor_eigvals[0]
             return 4 * math.sqrt(l1)
         elif self._ndim == 3:
-            # equivalent to _inertia_eigvals_to_axes_lengths_3D(ev)[0]
             ev = self.inertia_tensor_eigvals
             return math.sqrt(10 * (ev[0] + ev[1] - ev[2]))
         else:
@@ -623,9 +577,10 @@ def axis_minor_length(self):
             l2 = self.inertia_tensor_eigvals[-1]
             return 4 * math.sqrt(l2)
         elif self._ndim == 3:
-            # equivalent to _inertia_eigvals_to_axes_lengths_3D(ev)[-1]
             ev = self.inertia_tensor_eigvals
-            return math.sqrt(10 * (-ev[0] + ev[1] + ev[2]))
+            # use max to avoid possibly very small negative value due to
+            # numeric error
+            return math.sqrt(10 * max(-ev[0] + ev[1] + ev[2], 0.0))
         else:
             raise ValueError("axis_minor_length only available in 2D and 3D")