diff --git a/docs/index.md b/docs/index.md index 52f39e7..5f7d71e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,7 +12,8 @@ Utilities for interacting with object storage, based on [obspec](https://github. [`EagerStoreReader`][obspec_utils.readers.EagerStoreReader], [`BlockStoreReader`][obspec_utils.readers.BlockStoreReader]) for reading from object stores - **`obspec_utils.stores`**: Alternative store implementations (e.g., [`AiohttpStore`][obspec_utils.stores.AiohttpStore] for generic HTTP access) -- **`obspec_utils.wrappers`**: Composable store wrappers for caching, tracing, and concurrent fetching +- **`obspec_utils.wrappers`**: Composable store wrappers for [caching][obspec_utils.wrappers.CachingReadableStore], + [tracing][obspec_utils.wrappers.TracingReadableStore], and [concurrent fetching][obspec_utils.wrappers.SplittingReadableStore] - **`obspec_utils.registry`**: [`ObjectStoreRegistry`][obspec_utils.registry.ObjectStoreRegistry] for managing multiple stores and resolving URLs ## Design Philosophy diff --git a/pyproject.toml b/pyproject.toml index b34ee23..45763f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ xarray = [ "h5netcdf", "h5py", "cftime", + "scipy", ] fsspec = [ "s3fs", @@ -101,3 +102,8 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] + +[tool.pytest.ini_options] +markers = [ + "network: marks tests as requiring network access (run with --network)", +] diff --git a/src/obspec_utils/readers/_block.py b/src/obspec_utils/readers/_block.py index d99b303..4ed9801 100644 --- a/src/obspec_utils/readers/_block.py +++ b/src/obspec_utils/readers/_block.py @@ -87,6 +87,7 @@ def __init__( self._max_cached_blocks = max_cached_blocks self._position = 0 self._size: int | None = None + self._closed = False # LRU cache: OrderedDict with block_index -> bytes self._cache: OrderedDict[int, bytes] = OrderedDict() @@ -242,9 +243,27 @@ def tell(self) -> int: """ return self._position + @property + def closed(self) -> bool: + """Return True if the reader has been closed.""" + return self._closed + + def readable(self) -> bool: + """Return True, indicating this reader supports reading.""" + return True + + def seekable(self) -> bool: + """Return True, indicating this reader supports seeking.""" + return True + + def writable(self) -> bool: + """Return False, indicating this reader does not support writing.""" + return False + def close(self) -> None: """Close the reader and release the block cache.""" self._cache.clear() + self._closed = True def __enter__(self) -> "BlockStoreReader": """Enter the context manager.""" diff --git a/src/obspec_utils/readers/_buffered.py b/src/obspec_utils/readers/_buffered.py index 4ef23df..4c96272 100644 --- a/src/obspec_utils/readers/_buffered.py +++ b/src/obspec_utils/readers/_buffered.py @@ -74,6 +74,7 @@ def __init__( self._buffer_size = buffer_size self._position = 0 self._size: int | None = None + self._closed = False # Read-ahead buffer self._buffer = b"" self._buffer_start = 0 @@ -194,10 +195,28 @@ def tell(self) -> int: """ return self._position + @property + def closed(self) -> bool: + """Return True if the reader has been closed.""" + return self._closed + + def readable(self) -> bool: + """Return True, indicating this reader supports reading.""" + return True + + def seekable(self) -> bool: + """Return True, indicating this reader supports seeking.""" + return True + + def writable(self) -> bool: + """Return False, indicating this reader does not support writing.""" + return False + def close(self) -> None: """Close the reader and release the read-ahead buffer.""" self._buffer = b"" self._buffer_start = 0 + self._closed = True def __enter__(self) -> "BufferedStoreReader": """Enter the context manager.""" diff --git a/src/obspec_utils/readers/_eager.py b/src/obspec_utils/readers/_eager.py index fba0539..4154701 100644 --- a/src/obspec_utils/readers/_eager.py +++ b/src/obspec_utils/readers/_eager.py @@ -95,6 +95,7 @@ def __init__( """ self._store = store self._path = path + self._closed = False # Determine file size if not provided if file_size is None: @@ -155,9 +156,27 @@ def tell(self) -> int: """Return the current position in the cached file.""" return self._buffer.tell() + @property + def closed(self) -> bool: + """Return True if the reader has been closed.""" + return self._closed + + def readable(self) -> bool: + """Return True, indicating this reader supports reading.""" + return True + + def seekable(self) -> bool: + """Return True, indicating this reader supports seeking.""" + return True + + def writable(self) -> bool: + """Return False, indicating this reader does not support writing.""" + return False + def close(self) -> None: """Close the reader and release the in-memory buffer.""" self._buffer = io.BytesIO(b"") + self._closed = True def __enter__(self) -> "EagerStoreReader": """Enter the context manager.""" diff --git a/tests/conftest.py b/tests/conftest.py index 675e28e..8b74e2c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,25 @@ import xarray as xr +def pytest_addoption(parser): + parser.addoption( + "--network", + action="store_true", + default=False, + help="run tests that require network access", + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--network"): + # --network given: do not skip network tests + return + skip_network = pytest.mark.skip(reason="need --network option to run") + for item in items: + if "network" in item.keywords: + item.add_marker(skip_network) + + @pytest.fixture(scope="session") def container(): import docker diff --git a/tests/test_glob_osn.py b/tests/test_glob_osn.py index d7d8be5..c68785a 100644 --- a/tests/test_glob_osn.py +++ b/tests/test_glob_osn.py @@ -3,14 +3,12 @@ These tests hit the USGS OSN public endpoint and require network access. -To run these tests, set the environment variable: - RUN_REAL_WORLD_TESTS=1 uv run --all-groups pytest tests/test_real_world.py -v +To run these tests: + uv run --all-groups pytest tests/test_glob_osn.py -v --network """ from __future__ import annotations -import os - import pytest import s3fs @@ -18,11 +16,8 @@ from obstore.store import S3Store -# Skip all tests in this module unless RUN_REAL_WORLD_TESTS is set -pytestmark = pytest.mark.skipif( - not os.environ.get("RUN_REAL_WORLD_TESTS"), - reason="Set RUN_REAL_WORLD_TESTS=1 to run real-world S3 tests", -) +# Skip all tests in this module unless --network is passed +pytestmark = pytest.mark.network STORAGE_ENDPOINT = "https://usgs.osn.mghpcc.org" diff --git a/tests/test_readers.py b/tests/test_readers.py index 0a65d20..5642d68 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -560,3 +560,66 @@ def test_old_parameters_work(self): def test_is_subclass_of_block_store_reader(self): """ParallelStoreReader should be a subclass of BlockStoreReader.""" assert issubclass(ParallelStoreReader, BlockStoreReader) + + +# ============================================================================= +# HTTP Store / xarray integration tests +# ============================================================================= + + +@pytest.mark.parametrize("ReaderClass", ALL_READERS) +def test_reader_closed_property(ReaderClass): + """Test that readers have a closed property for file-like compatibility.""" + memstore = MemoryStore() + memstore.put("test.txt", b"hello world") + + reader = ReaderClass(memstore, "test.txt") + assert reader.closed is False + + reader.close() + assert reader.closed is True + + +@pytest.mark.parametrize("ReaderClass", ALL_READERS) +def test_reader_closed_property_with_context_manager(ReaderClass): + """Test that closed property is True after exiting context manager.""" + memstore = MemoryStore() + memstore.put("test.txt", b"hello world") + + with ReaderClass(memstore, "test.txt") as reader: + assert reader.closed is False + + assert reader.closed is True + + +@pytest.mark.parametrize("ReaderClass", ALL_READERS) +def test_reader_readable(ReaderClass): + """Test that readers report as readable.""" + memstore = MemoryStore() + memstore.put("test.txt", b"hello world") + + reader = ReaderClass(memstore, "test.txt") + assert reader.readable() is True + reader.close() + + +@pytest.mark.parametrize("ReaderClass", ALL_READERS) +def test_reader_seekable(ReaderClass): + """Test that readers report as seekable.""" + memstore = MemoryStore() + memstore.put("test.txt", b"hello world") + + reader = ReaderClass(memstore, "test.txt") + assert reader.seekable() is True + reader.close() + + +@pytest.mark.parametrize("ReaderClass", ALL_READERS) +def test_reader_writable(ReaderClass): + """Test that readers report as not writable.""" + memstore = MemoryStore() + memstore.put("test.txt", b"hello world") + + reader = ReaderClass(memstore, "test.txt") + assert reader.writable() is False + reader.close() diff --git a/tests/test_xarray.py b/tests/test_xarray.py index 7b23638..a362c83 100644 --- a/tests/test_xarray.py +++ b/tests/test_xarray.py @@ -18,3 +18,37 @@ def test_reader_with_xarray(local_netcdf4_file, ReaderClass) -> None: reader = ReaderClass(store=LocalStore(), path=local_netcdf4_file) ds_obstore = xr.open_dataset(reader, engine="h5netcdf") xr.testing.assert_allclose(ds_fsspec, ds_obstore) + + +@pytest.mark.network +def test_eager_reader_xarray_http_store(): + """ + Regression test: EagerStoreReader works with HTTPStore and xarray. + + This tests that: + 1. HTTPStore can fetch remote files + 2. EagerStoreReader correctly buffers the data + 3. The reader has the 'closed' property required by scipy/xarray + 4. xarray can open the dataset using the scipy engine + """ + pytest.importorskip("xarray") + import xarray as xr + from obstore.store import HTTPStore + + store = HTTPStore.from_url( + "https://raw.githubusercontent.com/pydata/xarray-data/refs/heads/master/" + ) + with EagerStoreReader(store, "air_temperature.nc") as reader: + # Verify reader has data + assert len(reader._buffer.getvalue()) > 0 + + # Verify closed property exists and is False + assert reader.closed is False + + # Open with xarray (scipy engine for NetCDF Classic format) + ds = xr.open_dataset(reader, engine="scipy") + assert "air" in ds.data_vars + ds.close() + + # Verify closed is True after context exit + assert reader.closed is True