Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ Utilities for interacting with object storage, based on [obspec](https://github.
[`EagerStoreReader`][obspec_utils.readers.EagerStoreReader], [`BlockStoreReader`][obspec_utils.readers.BlockStoreReader])
for reading from object stores
- **`obspec_utils.stores`**: Alternative store implementations (e.g., [`AiohttpStore`][obspec_utils.stores.AiohttpStore] for generic HTTP access)
- **`obspec_utils.wrappers`**: Composable store wrappers for caching, tracing, and concurrent fetching
- **`obspec_utils.wrappers`**: Composable store wrappers for [caching][obspec_utils.wrappers.CachingReadableStore],
[tracing][obspec_utils.wrappers.TracingReadableStore], and [concurrent fetching][obspec_utils.wrappers.SplittingReadableStore]
- **`obspec_utils.registry`**: [`ObjectStoreRegistry`][obspec_utils.registry.ObjectStoreRegistry] for managing multiple stores and resolving URLs

## Design Philosophy
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ xarray = [
"h5netcdf",
"h5py",
"cftime",
"scipy",
]
fsspec = [
"s3fs",
Expand Down Expand Up @@ -101,3 +102,8 @@ exclude_lines = [
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]

[tool.pytest.ini_options]
markers = [
"network: marks tests as requiring network access (run with --network)",
]
19 changes: 19 additions & 0 deletions src/obspec_utils/readers/_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __init__(
self._max_cached_blocks = max_cached_blocks
self._position = 0
self._size: int | None = None
self._closed = False
# LRU cache: OrderedDict with block_index -> bytes
self._cache: OrderedDict[int, bytes] = OrderedDict()

Expand Down Expand Up @@ -242,9 +243,27 @@ def tell(self) -> int:
"""
return self._position

@property
def closed(self) -> bool:
"""Return True if the reader has been closed."""
return self._closed

def readable(self) -> bool:
"""Return True, indicating this reader supports reading."""
return True

def seekable(self) -> bool:
"""Return True, indicating this reader supports seeking."""
return True

def writable(self) -> bool:
"""Return False, indicating this reader does not support writing."""
return False

def close(self) -> None:
"""Close the reader and release the block cache."""
self._cache.clear()
self._closed = True

def __enter__(self) -> "BlockStoreReader":
"""Enter the context manager."""
Expand Down
19 changes: 19 additions & 0 deletions src/obspec_utils/readers/_buffered.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def __init__(
self._buffer_size = buffer_size
self._position = 0
self._size: int | None = None
self._closed = False
# Read-ahead buffer
self._buffer = b""
self._buffer_start = 0
Expand Down Expand Up @@ -194,10 +195,28 @@ def tell(self) -> int:
"""
return self._position

@property
def closed(self) -> bool:
"""Return True if the reader has been closed."""
return self._closed

def readable(self) -> bool:
"""Return True, indicating this reader supports reading."""
return True

def seekable(self) -> bool:
"""Return True, indicating this reader supports seeking."""
return True

def writable(self) -> bool:
"""Return False, indicating this reader does not support writing."""
return False

def close(self) -> None:
"""Close the reader and release the read-ahead buffer."""
self._buffer = b""
self._buffer_start = 0
self._closed = True

def __enter__(self) -> "BufferedStoreReader":
"""Enter the context manager."""
Expand Down
19 changes: 19 additions & 0 deletions src/obspec_utils/readers/_eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __init__(
"""
self._store = store
self._path = path
self._closed = False

# Determine file size if not provided
if file_size is None:
Expand Down Expand Up @@ -155,9 +156,27 @@ def tell(self) -> int:
"""Return the current position in the cached file."""
return self._buffer.tell()

@property
def closed(self) -> bool:
"""Return True if the reader has been closed."""
return self._closed

def readable(self) -> bool:
"""Return True, indicating this reader supports reading."""
return True

def seekable(self) -> bool:
"""Return True, indicating this reader supports seeking."""
return True

def writable(self) -> bool:
"""Return False, indicating this reader does not support writing."""
return False

def close(self) -> None:
"""Close the reader and release the in-memory buffer."""
self._buffer = io.BytesIO(b"")
self._closed = True

def __enter__(self) -> "EagerStoreReader":
"""Enter the context manager."""
Expand Down
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@
import xarray as xr


def pytest_addoption(parser):
parser.addoption(
"--network",
action="store_true",
default=False,
help="run tests that require network access",
)


def pytest_collection_modifyitems(config, items):
if config.getoption("--network"):
# --network given: do not skip network tests
return
skip_network = pytest.mark.skip(reason="need --network option to run")
for item in items:
if "network" in item.keywords:
item.add_marker(skip_network)


@pytest.fixture(scope="session")
def container():
import docker
Expand Down
13 changes: 4 additions & 9 deletions tests/test_glob_osn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,21 @@

These tests hit the USGS OSN public endpoint and require network access.

To run these tests, set the environment variable:
RUN_REAL_WORLD_TESTS=1 uv run --all-groups pytest tests/test_real_world.py -v
To run these tests:
uv run --all-groups pytest tests/test_glob_osn.py -v --network
"""

from __future__ import annotations

import os

import pytest
import s3fs

from obspec_utils import glob
from obstore.store import S3Store


# Skip all tests in this module unless RUN_REAL_WORLD_TESTS is set
pytestmark = pytest.mark.skipif(
not os.environ.get("RUN_REAL_WORLD_TESTS"),
reason="Set RUN_REAL_WORLD_TESTS=1 to run real-world S3 tests",
)
# Skip all tests in this module unless --network is passed
pytestmark = pytest.mark.network


STORAGE_ENDPOINT = "https://usgs.osn.mghpcc.org"
Expand Down
63 changes: 63 additions & 0 deletions tests/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,3 +560,66 @@ def test_old_parameters_work(self):
def test_is_subclass_of_block_store_reader(self):
"""ParallelStoreReader should be a subclass of BlockStoreReader."""
assert issubclass(ParallelStoreReader, BlockStoreReader)


# =============================================================================
# HTTP Store / xarray integration tests
# =============================================================================


@pytest.mark.parametrize("ReaderClass", ALL_READERS)
def test_reader_closed_property(ReaderClass):
"""Test that readers have a closed property for file-like compatibility."""
memstore = MemoryStore()
memstore.put("test.txt", b"hello world")

reader = ReaderClass(memstore, "test.txt")
assert reader.closed is False

reader.close()
assert reader.closed is True


@pytest.mark.parametrize("ReaderClass", ALL_READERS)
def test_reader_closed_property_with_context_manager(ReaderClass):
"""Test that closed property is True after exiting context manager."""
memstore = MemoryStore()
memstore.put("test.txt", b"hello world")

with ReaderClass(memstore, "test.txt") as reader:
assert reader.closed is False

assert reader.closed is True


@pytest.mark.parametrize("ReaderClass", ALL_READERS)
def test_reader_readable(ReaderClass):
"""Test that readers report as readable."""
memstore = MemoryStore()
memstore.put("test.txt", b"hello world")

reader = ReaderClass(memstore, "test.txt")
assert reader.readable() is True
reader.close()


@pytest.mark.parametrize("ReaderClass", ALL_READERS)
def test_reader_seekable(ReaderClass):
"""Test that readers report as seekable."""
memstore = MemoryStore()
memstore.put("test.txt", b"hello world")

reader = ReaderClass(memstore, "test.txt")
assert reader.seekable() is True
reader.close()


@pytest.mark.parametrize("ReaderClass", ALL_READERS)
def test_reader_writable(ReaderClass):
"""Test that readers report as not writable."""
memstore = MemoryStore()
memstore.put("test.txt", b"hello world")

reader = ReaderClass(memstore, "test.txt")
assert reader.writable() is False
reader.close()
34 changes: 34 additions & 0 deletions tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,37 @@ def test_reader_with_xarray(local_netcdf4_file, ReaderClass) -> None:
reader = ReaderClass(store=LocalStore(), path=local_netcdf4_file)
ds_obstore = xr.open_dataset(reader, engine="h5netcdf")
xr.testing.assert_allclose(ds_fsspec, ds_obstore)


@pytest.mark.network
def test_eager_reader_xarray_http_store():
"""
Regression test: EagerStoreReader works with HTTPStore and xarray.

This tests that:
1. HTTPStore can fetch remote files
2. EagerStoreReader correctly buffers the data
3. The reader has the 'closed' property required by scipy/xarray
4. xarray can open the dataset using the scipy engine
"""
pytest.importorskip("xarray")
import xarray as xr
from obstore.store import HTTPStore

store = HTTPStore.from_url(
"https://raw.githubusercontent.com/pydata/xarray-data/refs/heads/master/"
)
with EagerStoreReader(store, "air_temperature.nc") as reader:
# Verify reader has data
assert len(reader._buffer.getvalue()) > 0

# Verify closed property exists and is False
assert reader.closed is False

# Open with xarray (scipy engine for NetCDF Classic format)
ds = xr.open_dataset(reader, engine="scipy")
assert "air" in ds.data_vars
ds.close()

# Verify closed is True after context exit
assert reader.closed is True
Loading