-
Notifications
You must be signed in to change notification settings - Fork 379
Make HGroup more robust to changes in h5py.Group #5284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
GarethCabournDavies
merged 10 commits into
gwastro:master
from
GarethCabournDavies:update_HFile
Mar 18, 2026
+432
−22
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
cc00cd8
Make HGroup more robust to changes in h5py.Group
GarethCabournDavies 18d7544
use try/except rather than complicated logic
GarethCabournDavies 2596b52
Add reasoning why this check is in place
GarethCabournDavies c1d896f
Run black on the new test scripts
GarethCabournDavies a1a1a71
Add debug log
GarethCabournDavies a6a65cd
TypeError can also be raised
GarethCabournDavies a0db45e
Rename stuff so that if alternative checksum methods are introduced, …
GarethCabournDavies 15db2a7
thinko
GarethCabournDavies bd18440
Merge branch 'gwastro:master' into update_HFile
GarethCabournDavies f730a99
I didnt think this would work but it has
GarethCabournDavies File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| import os | ||
| import unittest | ||
| import tempfile | ||
| import numpy as np | ||
| from utils import simple_exit, parse_args_cpu_only | ||
| from pycbc.io.hdf import HFile | ||
|
|
||
|
GarethCabournDavies marked this conversation as resolved.
|
||
| parse_args_cpu_only("io.hdf") | ||
|
|
||
|
|
||
| class TestIOHDF(unittest.TestCase): | ||
|
|
||
| def test_hfile_select_basic_and_premask(self): | ||
| """Test HFile.select basic selection, premask as indices and boolean.""" | ||
| with tempfile.TemporaryDirectory() as td: | ||
| p = os.path.join(td, "select.hdf") | ||
| with HFile(p, "w") as f: | ||
| f.create_dataset("x", data=np.arange(10, dtype=np.int64)) | ||
| f.create_dataset("y", data=np.arange(10, dtype=np.int64) * 2) | ||
|
|
||
| with HFile(p, "r") as f: | ||
| # simple select on x > 5 | ||
| idxs, (xs,) = f.select(lambda x: x > 5, "x") | ||
| np.testing.assert_array_equal( | ||
| idxs, np.flatnonzero(np.arange(10) > 5) | ||
| ) | ||
| np.testing.assert_array_equal(xs, np.arange(6, 10)) | ||
|
|
||
| # premask as boolean array (only first 5 allowed) | ||
| premask = np.zeros(10, dtype=bool) | ||
| premask[:5] = True | ||
| idxs2, _ = f.select(lambda x: x > 1, "x", premask=premask) | ||
| # only indices 2,3,4 should survive | ||
| np.testing.assert_array_equal( | ||
| idxs2, np.array([2, 3, 4], dtype=np.uint64) | ||
| ) | ||
|
|
||
| # premask as indices array | ||
| premask_idx = np.array([7, 8, 9], dtype=int) | ||
| idxs3, _ = f.select(lambda x: x > 7, "x", premask=premask_idx) | ||
| # only index 8,9 pass (x>7) while premask restricts to 7,8,9 -> final global indices 8 and 9 | ||
| np.testing.assert_array_equal( | ||
| idxs3, np.array([8, 9], dtype=np.uint64) | ||
| ) | ||
|
|
||
| def test_hfile_select_mismatched_lengths_raises(self): | ||
| """If datasets have different lengths, select should raise RuntimeError.""" | ||
| with tempfile.TemporaryDirectory() as td: | ||
| p = os.path.join(td, "badlen.hdf") | ||
| with HFile(p, "w") as f: | ||
| f.create_dataset("a", data=np.arange(5)) | ||
| f.create_dataset("b", data=np.arange(6)) | ||
|
|
||
| with HFile(p, "r") as f: | ||
| with self.assertRaises(RuntimeError): | ||
| f.select(lambda a, b: a > 0, "a", "b") | ||
|
|
||
| def test_filedata_mask_and_get_column(self): | ||
| """Test FileData.mask and get_column with a simple filter_func.""" | ||
| with tempfile.TemporaryDirectory() as td: | ||
| p = os.path.join(td, "filedata.hdf") | ||
| # create a file with a single top-level group so FileData can auto-select | ||
| with HFile(p, "w") as f: | ||
| grp = f.create_group("grp") | ||
| grp.create_dataset("a", data=np.arange(8)) | ||
| grp.create_dataset("b", data=np.arange(8) * 10) | ||
|
|
||
| # Use the FileData class from the module under test | ||
| from pycbc.io.hdf import FileData as FD | ||
|
|
||
| fdata = FD(p) | ||
|
|
||
| # Before setting filter_func, accessing mask should raise | ||
| with self.assertRaises(RuntimeError): | ||
| _ = fdata.mask | ||
|
|
||
| # Now set a filter function that references 'a' | ||
| fdata.filter_func = "self.a > 4" | ||
| # Access mask and column | ||
| m = fdata.mask | ||
| self.assertTrue(isinstance(m, np.ndarray) and m.dtype == bool) | ||
| col = fdata.get_column("a") | ||
| # Should return only values > 4 | ||
| np.testing.assert_array_equal(col, np.array([5, 6, 7])) | ||
|
|
||
| def test_dictarray_save_and_reload(self): | ||
| """Test that DictArray.save writes datasets and they can be reloaded.""" | ||
| from pycbc.io.hdf import DictArray | ||
|
|
||
| with tempfile.TemporaryDirectory() as td: | ||
| p = os.path.join(td, "dictarray.hdf") | ||
| data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} | ||
| da = DictArray(data=data) | ||
| # ensure attrs exist to satisfy save implementation | ||
| da.attrs = {"test": "yes"} | ||
| da.save(p) | ||
|
|
||
| # open and verify datasets | ||
| with HFile(p, "r") as f: | ||
| np.testing.assert_array_equal(f["a"][:], data["a"]) | ||
| np.testing.assert_array_equal(f["b"][:], data["b"]) | ||
| self.assertIn("test", f.attrs) | ||
|
|
||
| def test_datafromfiles_get_column_concat(self): | ||
| """Test DataFromFiles concatenates columns from multiple files.""" | ||
| from pycbc.io.hdf import DataFromFiles | ||
|
|
||
| with tempfile.TemporaryDirectory() as td: | ||
| p1 = os.path.join(td, "f1.hdf") | ||
| p2 = os.path.join(td, "f2.hdf") | ||
|
|
||
| # Create two files each with a single top-level group 'grp' | ||
| with HFile(p1, "w") as f: | ||
| g = f.create_group("grp") | ||
| g.create_dataset("val", data=np.array([1, 2, 3])) | ||
|
|
||
| with HFile(p2, "w") as f: | ||
| g = f.create_group("grp") | ||
| g.create_dataset("val", data=np.array([4, 5])) | ||
|
|
||
| df = DataFromFiles([p1, p2], group="grp") | ||
| out = df.get_column("val") | ||
| np.testing.assert_array_equal(out, np.array([1, 2, 3, 4, 5])) | ||
|
|
||
|
|
||
| suite = unittest.TestSuite() | ||
| suite.addTest(unittest.TestLoader().loadTestsFromTestCase(TestIOHDF)) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| results = unittest.TextTestRunner(verbosity=2).run(suite) | ||
| simple_exit(results) | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.