Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Region Properties Performance Overhaul - Part 5: Perimeter and Euler Characteristic #847

Open
wants to merge 14 commits into
base: branch-25.04
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/cucim/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ exclude = '''
# codespell --toml python/cucim/pyproject.toml . -i 3 -w
skip = "build*,dist,.cache,html,_build,_deps,3rdparty/*,_static,generated,latex,.git,*.ipynb,test_data/input/LICENSE-3rdparty,jitify_testing"
# ignore-regex = ""
ignore-words-list = "ans,coo,boun,bui,gool,hart,lond,manuel,nd,paeth,unser,wronly"
ignore-words-list = "ans,coo,boun,bu,bui,gool,hart,lond,manuel,nd,paeth,unser,wronly"
quiet-level = 3

# to undo: ./test_data/input/LICENSE-3rdparty
7 changes: 4 additions & 3 deletions python/cucim/src/cucim/skimage/_shared/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,13 @@ def pdist_max_blockwise(
)

blocks_per_dim = math.ceil(num_coords / coords_per_block)
coords = coords.astype(xp.float64, copy=False)
if blocks_per_dim > 1:
# reuse the same temporary storage array for most blocks
# (last block in row and column may be smaller)
temp = xp.zeros((coords_per_block, coords_per_block), dtype=xp.float32)
if coords.dtype not in [xp.float32, xp.float64]:
coords = coords.astype(xp.float32, copy=False)
temp = xp.zeros(
(coords_per_block, coords_per_block), dtype=coords.dtype
)
if not coords.flags.c_contiguous:
coords = xp.ascontiguousarray(coords)
max_dist = 0
Expand Down
22 changes: 22 additions & 0 deletions python/cucim/src/cucim/skimage/_shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@
]


# For n nonzero elements cupy.nonzero returns a tuple of length ndim where
# each element is an array of size (n, ) corresponding to the coordinates on
# a specific axis.
#
# Often for regionprops purposes we would rather have a single array of
# size (n, ndim) instead of a the tuple of arrays.
#
# CuPy's `_ndarray_argwhere` (used internally by cupy.nonzero) already provides
# this but is not part of the public API. To guard against potential future
# change we provide a less efficient fallback implementation.
try:
from cupy._core._routines_indexing import _ndarray_argwhere
except ImportError:

def _ndarray_argwhere(a):
"""Stack the result of cupy.nonzero into a single array

output shape will be (num_nonzero, ndim)
"""
return cp.stack(cp.nonzero(a), axis=-1)


def _count_wrappers(func):
"""Count the number of wrappers around `func`."""
unwrapped = func
Expand Down
27 changes: 17 additions & 10 deletions python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def binary_propagation(
)


def _binary_fill_holes_non_iterative(input, output=None):
def _binary_fill_holes_non_iterative(input, structure=None, output=None):
"""Non-iterative method for hole filling.

This algorithm is based on inverting the input and then using `label` to
Expand All @@ -832,7 +832,9 @@ def _binary_fill_holes_non_iterative(input, output=None):

# assign unique labels the background and holes
inverse_binary_mask = ~binary_mask
inverse_labels, _ = _measurements.label(inverse_binary_mask)
inverse_labels, _ = _measurements.label(
inverse_binary_mask, structure=structure
)

# After inversion, what was originally the background will now be the
# first foreground label encountered. This is ensured due to the
Expand All @@ -851,6 +853,13 @@ def _binary_fill_holes_non_iterative(input, output=None):
if output is None:
output = cupy.ascontiguousarray(temp)
else:
# handle output argument as in _binary_erosion
if isinstance(output, cupy.ndarray):
if output.dtype.kind == "c":
raise TypeError("Complex output type not supported")
else:
output = bool
output = _util._get_output(output, input)
output[:] = temp
return output

Expand Down Expand Up @@ -901,15 +910,13 @@ def binary_fill_holes(
filter_all_axes = axes == tuple(range(input.ndim))
if isinstance(origin, int):
origin = (origin,) * len(axes)
if structure is None and all(o == 0 for o in origin) and filter_all_axes:
if all(o == 0 for o in origin) and filter_all_axes:
return _binary_fill_holes_non_iterative(input, output=output)
else:
if filter_all_axes:
warnings.warn(
"It is recommended to keep the default structure=None and "
"origin=0, so that a faster non-iterative algorithm can be "
"used."
)
elif filter_all_axes:
warnings.warn(
"It is recommended to keep the default origin=0 so that a faster "
"non-iterative algorithm can be used."
)
mask = cupy.logical_not(input)
tmp = cupy.zeros(mask.shape, bool)
inplace = isinstance(output, cupy.ndarray)
Expand Down
194 changes: 9 additions & 185 deletions python/cucim/src/cucim/skimage/measure/_moments_analytical.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,190 +4,9 @@
import cupy as cp
import numpy as np

_order0_or_1 = """
mc[0] = m[0];
"""

_order2_2d = """
/* Implementation of the commented code below with C-order raveled
* indices into 3 x 3 matrices, m and mc.
*
* mc[0, 0] = m[0, 0];
* cx = m[1, 0] / m[0, 0];
* cy = m[0, 1] / m[0, 0];
* mc[1, 1] = m[1, 1] - cx*m[0, 1];
* mc[2, 0] = m[2, 0] - cx*m[1, 0];
* mc[0, 2] = m[0, 2] - cy*m[0, 1];
*/
mc[0] = m[0];
F cx = m[3] / m[0];
F cy = m[1] / m[0];
mc[4] = m[4] - cx*m[1];
mc[6] = m[6] - cx*m[3];
mc[2] = m[2] - cy*m[1];
"""

_order3_2d = """
/* Implementation of the commented code below with C-order raveled
* indices into 4 x 4 matrices, m and mc.
*
* mc[0, 0] = m[0, 0];
* cx = m[1, 0] / m[0, 0];
* cy = m[0, 1] / m[0, 0];
* mc[1, 1] = m[1, 1] - cx*m[0, 1];
* mc[2, 0] = m[2, 0] - cx*m[1, 0];
* mc[0, 2] = m[0, 2] - cy*m[0, 1];
* mc[2, 1] = (m[2, 1] - 2*cx*m[1, 1] - cy*m[2, 0] + cx*cx*m[0, 1] + cy*cx*m[1, 0]);
* mc[1, 2] = (m[1, 2] - 2*cy*m[1, 1] - cx*m[0, 2] + 2*cy*cx*m[0, 1]);
* mc[3, 0] = m[3, 0] - 3*cx*m[2, 0] + 2*cx*cx*m[1, 0];
* mc[0, 3] = m[0, 3] - 3*cy*m[0, 2] + 2*cy*cx*m[0, 1];
*/

mc[0] = m[0];
F cx = m[4] / m[0];
F cy = m[1] / m[0];
// 2nd order moments
mc[5] = m[5] - cx*m[1];
mc[8] = m[8] - cx*m[4];
mc[2] = m[2] - cy*m[1];
// 3rd order moments
mc[9] = (m[9] - 2*cx*m[5] - cy*m[8] + cx*cx*m[1] + cy*cx*m[4]);
mc[6] = (m[6] - 2*cy*m[5] - cx*m[2] + 2*cy*cx*m[1]);
mc[12] = m[12] - 3*cx*m[8] + 2*cx*cx*m[4];
mc[3] = m[3] - 3*cy*m[2] + 2*cy*cy*m[1];
""" # noqa


# Note for 2D kernels using C-order raveled indices
_order2_3d = """
/* Implementation of the commented code below with C-order raveled
* indices into shape (3, 3, 3) matrices, m and mc.
*
* mc[0, 0, 0] = m[0, 0, 0];
* cx = m[1, 0, 0] / m[0, 0, 0];
* cy = m[0, 1, 0] / m[0, 0, 0];
* cz = m[0, 0, 1] / m[0, 0, 0];
* mc[0, 0, 2] = -cz*m[0, 0, 1] + m[0, 0, 2];
* mc[0, 1, 1] = -cy*m[0, 0, 1] + m[0, 1, 1];
* mc[0, 2, 0] = -cy*m[0, 1, 0] + m[0, 2, 0];
* mc[1, 0, 1] = -cx*m[0, 0, 1] + m[1, 0, 1];
* mc[1, 1, 0] = -cx*m[0, 1, 0] + m[1, 1, 0];
* mc[2, 0, 0] = -cx*m[1, 0, 0] + m[2, 0, 0];
*/
mc[0] = m[0];
F cx = m[9] / m[0];
F cy = m[3] / m[0];
F cz = m[1] / m[0];
// 2nd order moments
mc[2] = -cz*m[1] + m[2];
mc[4] = -cy*m[1] + m[4];
mc[6] = -cy*m[3] + m[6];
mc[10] = -cx*m[1] + m[10];
mc[12] = -cx*m[3] + m[12];
mc[18] = -cx*m[9] + m[18];
"""

_order3_3d = """
/* Implementation of the commented code below with C-order raveled
* indices into shape (4, 4, 4) matrices, m and mc.
*
* mc[0, 0, 0] = m[0, 0, 0];
* cx = m[1, 0, 0] / m[0, 0, 0];
* cy = m[0, 1, 0] / m[0, 0, 0];
* cz = m[0, 0, 1] / m[0, 0, 0];
* // 2nd order moments
* mc[0, 0, 2] = -cz*m[0, 0, 1] + m[0, 0, 2];
* mc[0, 1, 1] = -cy*m[0, 0, 1] + m[0, 1, 1];
* mc[0, 2, 0] = -cy*m[0, 1, 0] + m[0, 2, 0];
* mc[1, 0, 1] = -cx*m[0, 0, 1] + m[1, 0, 1];
* mc[1, 1, 0] = -cx*m[0, 1, 0] + m[1, 1, 0];
* mc[2, 0, 0] = -cx*m[1, 0, 0] + m[2, 0, 0];
* // 3rd order moments
* mc[0, 0, 3] = (2*cz*cz*m[0, 0, 1] - 3*cz*m[0, 0, 2] + m[0, 0, 3]);
* mc[0, 1, 2] = (-cy*m[0, 0, 2] + 2*cz*(cy*m[0, 0, 1] - m[0, 1, 1]) + m[0, 1, 2]);
* mc[0, 2, 1] = (cy*cy*m[0, 0, 1] - 2*cy*m[0, 1, 1] + cz*(cy*m[0, 1, 0] - m[0, 2, 0]) + m[0, 2, 1]);
* mc[0, 3, 0] = (2*cy*cy*m[0, 1, 0] - 3*cy*m[0, 2, 0] + m[0, 3, 0]);
* mc[1, 0, 2] = (-cx*m[0, 0, 2] + 2*cz*(cx*m[0, 0, 1] - m[1, 0, 1]) + m[1, 0, 2]);
* mc[1, 1, 1] = (-cx*m[0, 1, 1] + cy*(cx*m[0, 0, 1] - m[1, 0, 1]) + cz*(cx*m[0, 1, 0] - m[1, 1, 0]) + m[1, 1, 1]);
* mc[1, 2, 0] = (-cx*m[0, 2, 0] - 2*cy*(-cx*m[0, 1, 0] + m[1, 1, 0]) + m[1, 2, 0]);
* mc[2, 0, 1] = (cx*cx*m[0, 0, 1] - 2*cx*m[1, 0, 1] + cz*(cx*m[1, 0, 0] - m[2, 0, 0]) + m[2, 0, 1]);
* mc[2, 1, 0] = (cx*cx*m[0, 1, 0] - 2*cx*m[1, 1, 0] + cy*(cx*m[1, 0, 0] - m[2, 0, 0]) + m[2, 1, 0]);
* mc[3, 0, 0] = (2*cx*cx*m[1, 0, 0] - 3*cx*m[2, 0, 0] + m[3, 0, 0]);
*/
mc[0] = m[0];
F cx = m[16] / m[0];
F cy = m[4] / m[0];
F cz = m[1] / m[0];
// 2nd order moments
mc[2] = -cz*m[1] + m[2];
mc[5] = -cy*m[1] + m[5];
mc[8] = -cy*m[4] + m[8];
mc[17] = -cx*m[1] + m[17];
mc[20] = -cx*m[4] + m[20];
mc[32] = -cx*m[16] + m[32];
// 3rd order moments
mc[3] = (2*cz*cz*m[1] - 3*cz*m[2] + m[3]);
mc[6] = (-cy*m[2] + 2*cz*(cy*m[1] - m[5]) + m[6]);
mc[9] = (cy*cy*m[1] - 2*cy*m[5] + cz*(cy*m[4] - m[8]) + m[9]);
mc[12] = (2*cy*cy*m[4] - 3*cy*m[8] + m[12]);
mc[18] = (-cx*m[2] + 2*cz*(cx*m[1] - m[17]) + m[18]);
mc[21] = (-cx*m[5] + cy*(cx*m[1] - m[17]) + cz*(cx*m[4] - m[20]) + m[21]);
mc[24] = (-cx*m[8] - 2*cy*(-cx*m[4] + m[20]) + m[24]);
mc[33] = (cx*cx*m[1] - 2*cx*m[17] + cz*(cx*m[16] - m[32]) + m[33]);
mc[36] = (cx*cx*m[4] - 2*cx*m[20] + cy*(cx*m[16] - m[32]) + m[36]);
mc[48] = (2*cx*cx*m[16] - 3*cx*m[32] + m[48]);
""" # noqa


def _moments_raw_to_central_fast(moments_raw):
"""Analytical formulae for 2D and 3D central moments of order < 4.

`moments_raw_to_central` will automatically call this function when
ndim < 4 and order < 4.

Parameters
----------
moments_raw : ndarray
The raw moments.

Returns
-------
moments_central : ndarray
The central moments.
"""
ndim = moments_raw.ndim
order = moments_raw.shape[0] - 1
# convert to float64 during the computation for better accuracy
moments_raw = moments_raw.astype(cp.float64, copy=False)
moments_central = cp.zeros_like(moments_raw)
if order >= 4 or ndim not in [2, 3]:
raise ValueError(
"This function only supports 2D or 3D moments of order < 4."
)
if ndim == 2:
if order < 2:
operation = _order0_or_1
elif order == 2:
operation = _order2_2d
elif order == 3:
operation = _order3_2d
elif ndim == 3:
if order < 2:
operation = _order0_or_1
elif order == 2:
operation = _order2_3d
elif order == 3:
operation = _order3_3d

kernel = cp.ElementwiseKernel(
"raw F m",
"raw F mc",
operation=operation,
name=f"order{order}_{ndim}d_kernel",
)
# run a single-threaded kernel, so we can avoid device->host->device copy
kernel(moments_raw, moments_central, size=1)
return moments_central
from ._regionprops_gpu_moments_kernels import (
regionprops_moments_central,
)


def moments_raw_to_central(moments_raw):
Expand All @@ -196,7 +15,12 @@ def moments_raw_to_central(moments_raw):
if ndim in [2, 3] and order < 4:
# fast path with analytical GPU kernels
# (avoids any host/device transfers)
moments_central = _moments_raw_to_central_fast(moments_raw)

# have to temporarily prepend a "labels" dimension to reuse
# regionprops_moments_central
moments_central = regionprops_moments_central(
moments_raw[cp.newaxis, ...], ndim
)[0]
return moments_central.astype(moments_raw.dtype, copy=False)

# Fallback to general formula applied on the host
Expand Down
51 changes: 3 additions & 48 deletions python/cucim/src/cucim/skimage/measure/_regionprops.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,51 +258,6 @@ def func2d(self, *args, **kwargs):
return func2d


def _inertia_eigvals_to_axes_lengths_3D(inertia_tensor_eigvals):
"""Compute ellipsoid axis lengths from inertia tensor eigenvalues.

Parameters
---------
inertia_tensor_eigvals : sequence of float
A sequence of 3 floating point eigenvalues, sorted in descending order.

Returns
-------
axis_lengths : list of float
The ellipsoid axis lengths sorted in descending order.

Notes
-----
Let a >= b >= c be the ellipsoid semi-axes and s1 >= s2 >= s3 be the
inertia tensor eigenvalues.

The inertia tensor eigenvalues are given for a solid ellipsoid in [1]_.
s1 = 1 / 5 * (a**2 + b**2)
s2 = 1 / 5 * (a**2 + c**2)
s3 = 1 / 5 * (b**2 + c**2)

Rearranging to solve for a, b, c in terms of s1, s2, s3 gives
a = math.sqrt(5 / 2 * ( s1 + s2 - s3))
b = math.sqrt(5 / 2 * ( s1 - s2 + s3))
c = math.sqrt(5 / 2 * (-s1 + s2 + s3))

We can then simply replace sqrt(5/2) by sqrt(10) to get the full axes
lengths rather than the semi-axes lengths.

References
----------
..[1] https://en.wikipedia.org/wiki/List_of_moments_of_inertia#List_of_3D_inertia_tensors
""" # noqa: E501
axis_lengths = []
for ax in range(2, -1, -1):
w = sum(
v * -1 if i == ax else v
for i, v in enumerate(inertia_tensor_eigvals)
)
axis_lengths.append(math.sqrt(10 * w))
return axis_lengths


class RegionProperties:
"""Please refer to `skimage.measure.regionprops` for more information
on the available region properties.
Expand Down Expand Up @@ -611,7 +566,6 @@ def axis_major_length(self):
l1 = self.inertia_tensor_eigvals[0]
return 4 * math.sqrt(l1)
elif self._ndim == 3:
# equivalent to _inertia_eigvals_to_axes_lengths_3D(ev)[0]
ev = self.inertia_tensor_eigvals
return math.sqrt(10 * (ev[0] + ev[1] - ev[2]))
else:
Expand All @@ -623,9 +577,10 @@ def axis_minor_length(self):
l2 = self.inertia_tensor_eigvals[-1]
return 4 * math.sqrt(l2)
elif self._ndim == 3:
# equivalent to _inertia_eigvals_to_axes_lengths_3D(ev)[-1]
ev = self.inertia_tensor_eigvals
return math.sqrt(10 * (-ev[0] + ev[1] + ev[2]))
# use max to avoid possibly very small negative value due to
# numeric error
return math.sqrt(10 * max(-ev[0] + ev[1] + ev[2], 0.0))
else:
raise ValueError("axis_minor_length only available in 2D and 3D")

Expand Down
Loading