Skip to content

Commit 5ecb5fe

Browse files
authoredAug 15, 2024··
Merge pull request #180 from NREL/pp/multi_time_from_list
Wildcard support for list input to `MultiTimeResource`
2 parents a96af89 + a569c25 commit 5ecb5fe

File tree

4 files changed

+111
-28
lines changed

4 files changed

+111
-28
lines changed
 

‎rex/multi_time_resource.py

+26-19
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
Classes to handle resource data stored over multiple files
44
"""
55
import os
6-
from fnmatch import fnmatch
76
from glob import glob
7+
from itertools import chain
8+
from fnmatch import fnmatch
89

910
import numpy as np
1011
import pandas as pd
@@ -33,8 +34,9 @@ def __init__(self, h5_path, res_cls=Resource, hsds=False, hsds_kwargs=None,
3334
h5_path : str | list
3435
Unix shell style pattern path with * wildcards to multi-file
3536
resource file sets. Files must have the same coordinates
36-
but can have different datasets or time indexes. Can also be an
37-
explicit list of multi time files.
37+
but can have different datasets or time indexes. Can also be
38+
an explicit list of multi time files, which themselves can
39+
contain * wildcards.
3840
res_cls : obj
3941
Resource class to use to open and access resource data
4042
hsds : bool
@@ -259,8 +261,9 @@ def _get_file_paths(cls, h5_path, hsds=False, hsds_kwargs=None):
259261
h5_path : str | list
260262
Unix shell style pattern path with * wildcards to multi-file
261263
resource file sets. Files must have the same coordinates
262-
but can have different datasets or time indexes. Can also be an
263-
explicit list of multi time files.
264+
but can have different datasets or time indexes. Can also be
265+
an explicit list of multi time files, which themselves can
266+
contain * wildcards.
264267
hsds : bool
265268
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
266269
behind HSDS
@@ -278,10 +281,9 @@ def _get_file_paths(cls, h5_path, hsds=False, hsds_kwargs=None):
278281
file_paths = cls._get_hsds_file_paths(h5_path,
279282
hsds_kwargs=hsds_kwargs)
280283
elif isinstance(h5_path, (list, tuple)):
281-
for fp in h5_path:
282-
msg = 'Does not exist: {}'.format(fp)
283-
assert os.path.exists(fp), msg
284-
file_paths = h5_path
284+
file_paths = list(chain.from_iterable(glob(fp) for fp in h5_path))
285+
for fp in file_paths:
286+
assert os.path.exists(fp), 'Does not exist: {}'.format(fp)
285287
elif os.path.isdir(h5_path):
286288
msg = ('h5_path must be a unix shell style pattern with '
287289
'wildcard * in order to find files, but received '
@@ -493,8 +495,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True,
493495
h5_path : str | list
494496
Unix shell style pattern path with * wildcards to multi-file
495497
resource file sets. Files must have the same coordinates
496-
but can have different datasets or time indexes. Can also be an
497-
explicit list of multi time files.
498+
but can have different datasets or time indexes. Can also be
499+
an explicit list of multi time files, which themselves can
500+
contain * wildcards.
498501
unscale : bool
499502
Boolean flag to automatically unscale variables on extraction
500503
str_decode : bool
@@ -850,8 +853,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
850853
h5_path : str | list
851854
Unix shell style pattern path with * wildcards to multi-file
852855
resource file sets. Files must have the same coordinates
853-
but can have different datasets or time indexes. Can also be an
854-
explicit list of multi time files.
856+
but can have different datasets or time indexes. Can also be
857+
an explicit list of multi time files, which themselves can
858+
contain * wildcards.
855859
unscale : bool
856860
Boolean flag to automatically unscale variables on extraction
857861
str_decode : bool
@@ -885,8 +889,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
885889
h5_path : str | list
886890
Unix shell style pattern path with * wildcards to multi-file
887891
resource file sets. Files must have the same coordinates
888-
but can have different datasets or time indexes. Can also be an
889-
explicit list of multi time files.
892+
but can have different datasets or time indexes. Can also be
893+
an explicit list of multi time files, which themselves can
894+
contain * wildcards.
890895
unscale : bool
891896
Boolean flag to automatically unscale variables on extraction
892897
str_decode : bool
@@ -920,8 +925,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
920925
h5_path : str | list
921926
Unix shell style pattern path with * wildcards to multi-file
922927
resource file sets. Files must have the same coordinates
923-
but can have different datasets or time indexes. Can also be an
924-
explicit list of multi time files.
928+
but can have different datasets or time indexes. Can also be
929+
an explicit list of multi time files, which themselves can
930+
contain * wildcards.
925931
unscale : bool
926932
Boolean flag to automatically unscale variables on extraction
927933
str_decode : bool
@@ -953,8 +959,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
953959
h5_path : str | list
954960
Unix shell style pattern path with * wildcards to multi-file
955961
resource file sets. Files must have the same coordinates
956-
but can have different datasets or time indexes. Can also be an
957-
explicit list of multi time files.
962+
but can have different datasets or time indexes. Can also be
963+
an explicit list of multi time files, which themselves can
964+
contain * wildcards.
958965
unscale : bool
959966
Boolean flag to automatically unscale variables on extraction
960967
str_decode : bool

‎rex/multi_year_resource.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -411,10 +411,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
411411
"""
412412
Parameters
413413
----------
414-
h5_path : str
414+
h5_path : str | list
415415
Unix shell style pattern path with * wildcards to multi-file
416416
resource file sets. Files must have the same coordinates
417-
but can have different datasets or time indexes.
417+
but can have different datasets or time indexes. Can also be
418+
an explicit list of multi time files, which themselves can
419+
contain * wildcards.
418420
years : list, optional
419421
List of years to access, by default None
420422
unscale : bool
@@ -498,10 +500,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
498500
"""
499501
Parameters
500502
----------
501-
h5_path : str
503+
h5_path : str | list
502504
Unix shell style pattern path with * wildcards to multi-file
503505
resource file sets. Files must have the same coordinates
504-
but can have different datasets or time indexes.
506+
but can have different datasets or time indexes. Can also be
507+
an explicit list of multi time files, which themselves can
508+
contain * wildcards.
505509
years : list, optional
506510
List of years to access, by default None
507511
unscale : bool
@@ -532,10 +536,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
532536
"""
533537
Parameters
534538
----------
535-
h5_path : str
539+
h5_path : str | list
536540
Unix shell style pattern path with * wildcards to multi-file
537541
resource file sets. Files must have the same coordinates
538-
but can have different datasets or time indexes.
542+
but can have different datasets or time indexes. Can also be
543+
an explicit list of multi time files, which themselves can
544+
contain * wildcards.
539545
years : list, optional
540546
List of years to access, by default None
541547
unscale : bool
@@ -566,10 +572,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
566572
"""
567573
Parameters
568574
----------
569-
h5_path : str
575+
h5_path : str | list
570576
Unix shell style pattern path with * wildcards to multi-file
571577
resource file sets. Files must have the same coordinates
572-
but can have different datasets or time indexes.
578+
but can have different datasets or time indexes. Can also be
579+
an explicit list of multi time files, which themselves can
580+
contain * wildcards.
573581
years : list, optional
574582
List of years to access, by default None
575583
unscale : bool

‎rex/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""rex Version number"""
22

3-
__version__ = "0.2.88"
3+
__version__ = "0.2.89"

‎tests/test_multi_time_resource.py

+68
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ def MultiTimeNSRDB_list_res():
3535
return MultiTimeNSRDB(files)
3636

3737

38+
@pytest.fixture
39+
def MultiTimeNSRDB_wildcard_list_res():
40+
"""
41+
Init NSRDB resource handler
42+
"""
43+
files = [os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_20*.h5')]
44+
45+
return MultiTimeNSRDB(files)
46+
47+
3848
@pytest.fixture
3949
def MultiTimeWind_res():
4050
"""
@@ -258,6 +268,49 @@ def test_ds(MultiTimeNSRDB_list_res, ds_name='dni'):
258268
MultiTimeNSRDB_list_res.close()
259269

260270

271+
class TestMultiTimeWildcardList:
272+
"""
273+
Test multi time resource handler from list of files with wildcards
274+
"""
275+
@staticmethod
276+
def test_res(MultiTimeNSRDB_wildcard_list_res):
277+
"""
278+
test NSRDB class calls
279+
"""
280+
check_res(MultiTimeNSRDB_wildcard_list_res)
281+
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
282+
MultiTimeNSRDB_wildcard_list_res.close()
283+
284+
@staticmethod
285+
def test_meta(MultiTimeNSRDB_wildcard_list_res):
286+
"""
287+
test extraction of NSRDB meta data
288+
"""
289+
check_meta(MultiTimeNSRDB_wildcard_list_res)
290+
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
291+
MultiTimeNSRDB_wildcard_list_res.close()
292+
293+
@staticmethod
294+
def test_time_index(MultiTimeNSRDB_wildcard_list_res):
295+
"""
296+
test extraction of NSRDB time_index
297+
"""
298+
check_time_index(MultiTimeNSRDB_wildcard_list_res)
299+
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
300+
MultiTimeNSRDB_wildcard_list_res.close()
301+
302+
@staticmethod
303+
def test_ds(MultiTimeNSRDB_wildcard_list_res, ds_name='dni'):
304+
"""
305+
test extraction of a variable array, attributes, and properties
306+
"""
307+
check_dset(MultiTimeNSRDB_wildcard_list_res, ds_name)
308+
check_attrs(MultiTimeNSRDB_wildcard_list_res, ds_name)
309+
check_properties(MultiTimeNSRDB_wildcard_list_res, ds_name)
310+
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
311+
MultiTimeNSRDB_wildcard_list_res.close()
312+
313+
261314
class TestMultiTimeWindResource:
262315
"""
263316
Multi Year WindResource Resource handler tests
@@ -323,6 +376,21 @@ def test_map_hsds_files():
323376
assert not any(wrong), 'Wrong files: {}'.format(wrong)
324377

325378

379+
def test_multi_time_resource_acts_like_resource_single_file():
380+
"""Test that MultiTimeWindResource behaves like Resource for one file."""
381+
382+
path = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
383+
384+
with Resource(path) as res, MultiTimeWindResource([path]) as mt_res:
385+
assert set(res.datasets) == set(mt_res.datasets)
386+
assert (res.time_index == mt_res.time_index).all()
387+
assert res.shape == mt_res.shape
388+
for ds in res.datasets:
389+
if any(kw in ds for kw in ['meta', 'time']):
390+
continue
391+
assert np.allclose(res[ds], mt_res[ds])
392+
393+
326394
@pytest.mark.timeout(10)
327395
def test_mt_iterator():
328396
"""

0 commit comments

Comments
 (0)
Please sign in to comment.