-
-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Alternative find_objects implementation * Dask docs suggest bag.fold is more efficient than bag.reduction * Merge dataframes instead of using combine * Make find_objects output obviously a dask dataframe * A clearer way to use dask delayed * Try to clarify dataframe column naming convention * Additional clarifying comment * Default step size for slices is None * Use Marvin's suggestion so we don't have to call compute twice on result * Revert delayed changes * Be consistent with slice step (use default value) * Remove redundant import statement * Avoid user having to call compute twice on result * Fix delayed so we know output is a dask dataframe * Use functools partial to pass in array dimension information to _find_objects * Improve docstrings in _find_objects.py * Add check for integer array dtype in find_objects * Re-trigger CI * Improve find_objects docstring re return value * find_objects _merge_bounding_boxes, clarify comment * Fix find_objects bug where chunk has no non-zero labels * Add test for find_objects with empty chunk in array
- Loading branch information
1 parent
9b4ff96
commit f1fc159
Showing
3 changed files
with
209 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from dask.delayed import Delayed | ||
import dask.dataframe as dd | ||
|
||
|
||
def _array_chunk_location(block_id, chunks): | ||
"""Pixel coordinate of top left corner of the array chunk.""" | ||
array_location = [] | ||
for idx, chunk in zip(block_id, chunks): | ||
array_location.append(sum(chunk[:idx])) | ||
return tuple(array_location) | ||
|
||
|
||
def _find_bounding_boxes(x, array_location): | ||
"""An alternative to scipy.ndimage.find_objects. | ||
We use this alternative because scipy.ndimage.find_objects | ||
returns a tuple of length N, where N is the largest integer label. | ||
This is not ideal for distributed labels, where there might be only | ||
one or two objects in an image chunk labelled with very large integers. | ||
This alternative function returns a pandas dataframe, | ||
with one row per object found in the image chunk. | ||
""" | ||
unique_vals = np.unique(x) | ||
unique_vals = unique_vals[unique_vals != 0] | ||
result = {} | ||
for val in unique_vals: | ||
positions = np.where(x == val) | ||
slices = tuple(slice(np.min(pos) + array_location[i], np.max(pos) + 1 + array_location[i]) for i, pos in enumerate(positions)) | ||
result[val] = slices | ||
column_names = [i for i in range(x.ndim)] # column names are: 0, 1, ... nD | ||
return pd.DataFrame.from_dict(result, orient='index', columns=column_names) | ||
|
||
|
||
def _combine_slices(slices): | ||
"Return the union of all slices." | ||
if len(slices) == 1: | ||
return slices[0] | ||
else: | ||
start = min([sl.start for sl in slices]) | ||
stop = max([sl.stop for sl in slices]) | ||
return slice(start, stop) | ||
|
||
|
||
def _merge_bounding_boxes(x, ndim): | ||
"""Merge the bounding boxes describing objects over multiple image chunks.""" | ||
x = x.dropna() | ||
data = {} | ||
# For each dimension in the array, | ||
# pick out the slice values belonging to that dimension | ||
# and combine the slices | ||
# (i.e. find the union; the slice expanded to all input slices). | ||
for i in range(ndim): | ||
# Array dimensions are labelled by a number followed by an underscroe | ||
# i.e. column labels are: 0_x, 1_x, 2_x, ... 0_y, 1_y, 2_y, ... | ||
# (x and y represent the pair of chunks label slices are merged from) | ||
slices = [x[ii] for ii in x.index if str(ii).startswith(str(i))] | ||
combined_slices = _combine_slices(slices) | ||
data[i] = combined_slices | ||
result = pd.Series(data=data, index=[i for i in range(ndim)], name=x.name) | ||
return result | ||
|
||
|
||
def _find_objects(ndim, df1, df2): | ||
"""Main utility function for find_objects.""" | ||
meta = dd.utils.make_meta([(i, object) for i in range(ndim)]) | ||
if isinstance(df1, Delayed): | ||
df1 = dd.from_delayed(df1, meta=meta) | ||
if isinstance(df2, Delayed): | ||
df2 = dd.from_delayed(df2, meta=meta) | ||
ddf = dd.merge(df1, df2, how="outer", left_index=True, right_index=True) | ||
result = ddf.apply(_merge_bounding_boxes, ndim=ndim, axis=1, meta=meta) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
from dask_image.ndmeasure._utils import _labeled_comprehension_delayed | ||
import dask.array as da | ||
import dask.dataframe as dd | ||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
|
||
import dask_image.ndmeasure | ||
|
||
|
||
@pytest.fixture | ||
def label_image(): | ||
"""Return small label image for tests. | ||
dask.array<array, shape=(5, 10), dtype=int64, chunksize=(5, 5), chunktype=numpy.ndarray> | ||
array([[ 0, 0, 0, 0, 0, 0, 0, 333, 333, 333], | ||
[111, 111, 0, 0, 0, 0, 0, 333, 333, 333], | ||
[111, 111, 0, 0, 0, 0, 0, 0, 0, 0], | ||
[ 0, 0, 0, 222, 222, 222, 222, 222, 222, 0], | ||
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) | ||
""" | ||
label_image = np.zeros((5, 10)).astype(int) | ||
label_image[1:3,0:2] = 111 | ||
label_image[3,3:-2] = 222 | ||
label_image[0:2,-3:] = 333 | ||
label_image = da.from_array(label_image, chunks=(5, 5)) | ||
return label_image | ||
|
||
|
||
@pytest.fixture | ||
def label_image_with_empty_chunk(): | ||
"""Return small label image with an empty chunk for tests. | ||
dask.array<array, shape=(6, 6), dtype=int64, chunksize=(3, 3), chunktype=numpy.ndarray> | ||
array([[ 0, 0, 0, 0, 0, 0], | ||
[111, 111, 0, 0, 0, 0], | ||
[111, 111, 0, 0, 0, 0], | ||
[ 0, 0, 0, 0, 0, 0], | ||
[ 0, 0, 0, 222, 222, 222], | ||
[ 0, 0, 0, 0, 0, 0]]) | ||
""" | ||
label_image = np.zeros((6, 6)).astype(int) | ||
label_image[1:3,0:2] = 111 | ||
label_image[4,3:] = 222 | ||
label_image = da.from_array(label_image, chunks=(3, 3)) | ||
return label_image | ||
|
||
|
||
def test_find_objects(label_image): | ||
result = dask_image.ndmeasure.find_objects(label_image) | ||
assert isinstance(result, dd.DataFrame) | ||
computed_result = result.compute() | ||
assert isinstance(computed_result, pd.DataFrame) | ||
expected = pd.DataFrame.from_dict( | ||
{0: {111: slice(1, 3), 222: slice(3, 4), 333: slice(0, 2)}, | ||
1: {111: slice(0, 2), 222: slice(3, 8), 333: slice(7, 10)}} | ||
) | ||
assert computed_result.equals(expected) | ||
|
||
|
||
def test_3d_find_objects(label_image): | ||
label_image = da.stack([label_image, label_image], axis=2) | ||
result = dask_image.ndmeasure.find_objects(label_image) | ||
assert isinstance(result, dd.DataFrame) | ||
computed_result = result.compute() | ||
assert isinstance(computed_result, pd.DataFrame) | ||
expected = pd.DataFrame.from_dict( | ||
{0: {111: slice(1, 3), 222: slice(3, 4), 333: slice(0, 2)}, | ||
1: {111: slice(0, 2), 222: slice(3, 8), 333: slice(7, 10)}, | ||
2: {111: slice(0, 2), 222: slice(0, 2), 333: slice(0, 2)}} | ||
) | ||
assert computed_result.equals(expected) | ||
|
||
|
||
def test_find_objects_with_empty_chunks(label_image_with_empty_chunk): | ||
result = dask_image.ndmeasure.find_objects(label_image_with_empty_chunk) | ||
assert isinstance(result, dd.DataFrame) | ||
computed_result = result.compute() | ||
assert isinstance(computed_result, pd.DataFrame) | ||
expected = pd.DataFrame.from_dict( | ||
{0: {111: slice(1, 3, None), 222: slice(4, 5, None)}, | ||
1: {111: slice(0, 2, None), 222: slice(3, 6, None)}} | ||
) | ||
assert computed_result.equals(expected) |