Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

silx.io.open: Added basic support for tiled URLs #4121

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
159 changes: 159 additions & 0 deletions src/silx/io/tiledh5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# /*##########################################################################
# Copyright (C) 2024 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ############################################################################*/
"""
Provides a wrapper to expose `Tiled <https://blueskyproject.io/tiled/>`_

This is a preview feature.
"""
from __future__ import annotations


from functools import lru_cache
import logging
import numpy
from . import commonh5
import h5py
import tiled.client
from tiled.client.cache import Cache


_logger = logging.getLogger(__name__)


def _get_children(
parent: TiledH5 | TiledGroup,
container: tiled.client.container.Container,
max_children: int | None = None,
):
"""Return first max_children entries of given container as a dict of commonh5 wrappers.

:param parent: The commonh5 wrapper for which to retrieve children.
:param container: The tiled container from which to retrieve the entries.
:param max_children: The maximum number of children to retrieve.
"""
items = container.items()

if max_children is not None and len(items) > max_children:
items = items.head(max_children)
_logger.warning(
f"{container.uri} contains too many entries: Only loading first {max_children}."
)

children = {}
for key, client in items:
if isinstance(client, tiled.client.container.Container):
children[key] = TiledGroup(client, name=key, parent=parent)
elif isinstance(client, tiled.client.array.ArrayClient):
children[key] = TiledDataset(client, name=key, parent=parent)
else:
_logger.warning(f"Unsupported child type: {key}: {client}")
children[key] = commonh5.Dataset(
key,
numpy.array("Unsupported", h5py.special_dtype(vlen=str)),
parent=parent,
)
return children


class TiledH5(commonh5.File):
"""tiled client wrapper"""

MAX_CHILDREN_PER_GROUP: int | None = None
"""Maximum number of children to instantiate for each group.

Set to None for allowing an unbound number of children per group.
"""

_cache = None
"""Shared tiled cache with lazy initialization"""

def __init__(
self,
name: str,
mode: str | None = None,
attrs: dict | None = None,
):
assert mode in ("r", None)
t20100 marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(name, mode, attrs)
if self._cache is None:
TiledH5._cache = Cache() # Use tiled cache default
t20100 marked this conversation as resolved.
Show resolved Hide resolved
self.__container = tiled.client.from_uri(name, cache=self._cache)
assert isinstance(self.__container, tiled.client.container.Container)
_logger.warning("tiled support is a preview feature: This may change or be removed without notice.")

def close(self):
super().close()
self.__container = None

@lru_cache
def _get_items(self):
return _get_children(self, self.__container, self.MAX_CHILDREN_PER_GROUP)


class TiledGroup(commonh5.Group):
"""tiled Container wrapper"""

def __init__(
self,
container: tiled.client.container.Container,
name: str,
parent: TiledH5 | TiledGroup | None = None,
attrs: dict | None = None,
):
super().__init__(name, parent, attrs)
self.__container = container

@lru_cache
def _get_items(self):
return _get_children(self, self.__container, self.file.MAX_CHILDREN_PER_GROUP)


class TiledDataset(commonh5.Dataset):
"""tiled ArrayClient wrapper"""

def __init__(
self,
client: tiled.client.array.ArrayClient,
name: str,
parent: TiledH5 | TiledGroup | None = None,
attrs: dict | None = None,
):
super().__init__(name, client, parent, attrs)

@property
def shape(self):
return self._get_data().shape

@property
def size(self):
return self._get_data().size

def __len__(self):
return len(self.__client)

def __getitem__(self, item):
return self._get_data()[item]

@property
def value(self):
return self._get_data()[()]
39 changes: 28 additions & 11 deletions src/silx/io/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# /*##########################################################################
# Copyright (C) 2016-2023 European Synchrotron Radiation Facility
# Copyright (C) 2016-2024 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -33,13 +33,11 @@
import time
import logging
from typing import Generator, Union, Optional
import urllib.parse

import numpy

from silx.utils.proxy import Proxy
from .url import DataUrl
from . import h5py_utils
from .._version import calc_hexversion

import h5py
Expand All @@ -51,6 +49,12 @@
except ImportError as e:
h5pyd = None

try:
from .tiledh5 import TiledH5
except ImportError as e:
TiledH5 = None


logger = logging.getLogger(__name__)

NEXUS_HDF5_EXT = [".h5", ".nx5", ".nxs", ".hdf", ".hdf5", ".cxi"]
Expand Down Expand Up @@ -693,17 +697,30 @@ def open(filename): # pylint:disable=redefined-builtin
:rtype: h5py-like node
"""
url = DataUrl(filename)
if url.scheme() in ("http", "https"):
errors = [f"Failed to open {filename}"]
if h5pyd is not None:
try:
return _open_url_with_h5pyd(filename)
except Exception as e:
errors.append(f"- h5pyd failed: {type(e)} {e}")

if url.scheme() in [None, "file", "silx"]:
# That's a local file
if not url.is_valid():
raise IOError("URL '%s' is not valid" % filename)
h5_file = _open_local_file(url.file_path())
elif url.scheme() in ("http", "https"):
return _open_url_with_h5pyd(filename)
else:
if TiledH5 is not None:
try:
return TiledH5(filename)
except Exception as e:
errors.append(f"- tiled failed: {type(e)} {e}")

raise IOError("\n".join(errors))

if url.scheme() not in (None, "file", "silx"):
raise IOError(f"Unsupported URL scheme {url.scheme}: {filename}")

# That's a local file
if not url.is_valid():
raise IOError("URL '%s' is not valid" % filename)
h5_file = _open_local_file(url.file_path())

if url.data_path() in [None, "/", ""]: # The full file is requested
if url.data_slice():
raise IOError(f"URL '{filename}' containing slicing is not supported")
Expand Down