Skip to content

Commit 4d29377

Browse files
committed
feat: add label computation and storing before configuration generation
1 parent 0cf3ffe commit 4d29377

File tree

3 files changed

+59
-8
lines changed

3 files changed

+59
-8
lines changed

ingestion_tools/dataset_configs/template.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ annotations: OPTIONAL
100100
is_visualization_default: see InstanceSegmentation.is_visualization_default
101101
is_portal_standard: OPTIONAL, BOOLEAN (DEFAULT FALSE)
102102
scale_factor: OPTIONAL, FLOAT (DEFAULT 1) (POSITIVE)
103-
mask_label: OPTIONAL, INTEGER (DEFAULT 1)
104103
parent_filters: see InstanceSegmentation.parent_filters
105104
exclude: SEE InstanceSegmentation.exclude
106105
- SegmentationMask:

ingestion_tools/scripts/common/image.py

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22
import os
33
import os.path
4-
import time
54
from abc import ABC, abstractmethod
65
from dataclasses import dataclass
76
from datetime import datetime
@@ -54,12 +53,24 @@ class ZarrReader:
5453
def __init__(self, fs, zarrdir):
5554
self.fs = fs
5655
self.zarrdir = zarrdir
56+
self._loc = None
5757

5858
def get_data(self):
59-
loc = ome_zarr.io.ZarrLocation(self.fs.destformat(self.zarrdir))
59+
loc = self._load_zarr_loc()
6060
data = loc.load("0")
6161
return data
6262

63+
@property
64+
def attrs(self):
65+
loc = self._load_zarr_loc()
66+
group = zarr.group(loc.store)
67+
return group.attrs
68+
69+
def _load_zarr_loc(self):
70+
if self._loc is None:
71+
self._loc = ome_zarr.io.ZarrLocation(self.fs.destformat(self.zarrdir))
72+
return self._loc
73+
6374

6475
class ZarrWriter:
6576
def __init__(self, fs: FileSystemApi, zarrdir: str):
@@ -98,6 +109,7 @@ def write_data(
98109
voxel_spacing: List[Tuple[float, float, float]],
99110
chunk_size: Tuple[int, int, int] = (256, 256, 256),
100111
scale_z_axis: bool = True,
112+
store_labels_metadata: bool = False,
101113
):
102114
pyramid = []
103115
scales = []
@@ -111,6 +123,36 @@ def write_data(
111123
pyramid.append(d)
112124
scales.append(self.ome_zarr_transforms(vs))
113125

126+
# Store the labels contained in the data if the flag is activated
127+
if store_labels_metadata:
128+
129+
arr = data[0]
130+
131+
# t = time.perf_counter()
132+
labels = [int(label) for label in np.unique(arr[arr > 0])]
133+
# print(f"Time full image {time.perf_counter() - t:.3f}s {labels}")
134+
135+
# t = time.perf_counter()
136+
# sub = arr[::10, :, :]
137+
# labels = set(int(label) for label in np.unique(sub[sub > 0]))
138+
# sub = arr[:, ::10, :]
139+
# labels.update(int(label) for label in np.unique(sub[sub > 0]))
140+
# sub = arr[:, :, :10]
141+
# labels.update(int(label) for label in np.unique(sub[sub > 0]))
142+
# print(f"Time 10th slices {time.perf_counter() - t:.3f}s {list(labels)}")
143+
144+
# t = time.perf_counter()
145+
# sub = arr[::50, :, :]
146+
# labels = set(int(label) for label in np.unique(sub[sub > 0]))
147+
# sub = arr[:, ::50, :]
148+
# labels.update(int(label) for label in np.unique(sub[sub > 0]))
149+
# sub = arr[:, :, :50]
150+
# labels.update(int(label) for label in np.unique(sub[sub > 0]))
151+
# print(f"Time 50th slices {time.perf_counter() - t:.3f}s {list(labels)}")
152+
153+
label_values = [{"id": label, "label": f"{label}"} for label in labels]
154+
self.root_group.attrs["labels_metadata"] = {"version": "1.0", "labels": label_values}
155+
114156
# Write the pyramid to the zarr store
115157
return ome_zarr.writer.write_multiscale(
116158
pyramid,
@@ -345,12 +387,18 @@ def pyramid_to_omezarr(
345387
zarrdir: str,
346388
write: bool = True,
347389
pyramid_voxel_spacing: List[Tuple[float, float, float]] = None,
390+
store_labels_metadata: bool = False,
348391
) -> str:
349392
destination_zarrdir = fs.destformat(zarrdir)
350393
# Write zarr data as 256^3 voxel chunks
351394
if write:
352395
writer = ZarrWriter(fs, destination_zarrdir)
353-
writer.write_data(pyramid, voxel_spacing=pyramid_voxel_spacing, chunk_size=(256, 256, 256))
396+
writer.write_data(
397+
pyramid,
398+
voxel_spacing=pyramid_voxel_spacing,
399+
chunk_size=(256, 256, 256),
400+
store_labels_metadata=store_labels_metadata,
401+
)
354402
else:
355403
print(f"skipping remote push for {destination_zarrdir}")
356404
return os.path.basename(zarrdir)
@@ -463,7 +511,6 @@ def get_pyramid_base_data(self) -> np.ndarray:
463511
if not self.scale_0_dims:
464512
return self.scaled_data_transformation(data)
465513

466-
t = time.perf_counter()
467514
from scipy.ndimage import zoom
468515

469516
x, y, z = data.shape
@@ -479,7 +526,6 @@ def get_pyramid_base_data(self) -> np.ndarray:
479526
# )
480527

481528
rescaled = zoom(data, zoom=zoom_factor, order=0)
482-
print(f"Rescaled in {time.perf_counter() - t:.3f}")
483529

484530
return self.scaled_data_transformation(rescaled)
485531

@@ -570,6 +616,7 @@ def make_pyramids(
570616
f"{output_prefix}.zarr",
571617
write_zarr,
572618
pyramid_voxel_spacing=pyramid_voxel_spacing,
619+
store_labels_metadata=multilabels,
573620
)
574621
_ = tc.pyramid_to_mrc(fs, pyramid, f"{output_prefix}.mrc", write_mrc, header_mapper, voxel_spacing)
575622

ingestion_tools/scripts/importers/visualization_config.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,13 @@ def _get_labels(self, path: str):
248248
import numpy as np
249249

250250
reader = ZarrReader(self.config.fs, segmentation_filename)
251-
arr = reader.get_data()
252-
return tuple(set(da.unique(arr[arr > 0]).compute().astype(np.integer)))
251+
try:
252+
labels_info = reader.attrs.get("labels_metadata")["labels"]
253+
labels = [label["id"] for label in labels_info]
254+
except KeyError:
255+
arr = reader.get_data()
256+
labels = set(da.unique(arr[arr > 0]).compute().astype(np.integer))
257+
return tuple(labels)
253258

254259
def _create_config(self, alignment_metadata_path: str) -> dict[str, Any]:
255260
tomogram = self.get_tomogram()

0 commit comments

Comments
 (0)