Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not check for monotonic increase of timestamps in validation by default #503

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion extra_data/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,17 @@ def test_control_data_timestamps(data_aggregator_file):
ts[10] = 5

with raises(ValidationError) as excinfo:
FileValidator(FileAccess(data_aggregator_file)).validate()
FileValidator(FileAccess(data_aggregator_file), timestamps_should_increase=True).validate()
problem = excinfo.value.problems.pop()
assert problem['msg'] == 'Timestamp is decreasing, e.g. at 10 (5 < 10)'
assert problem['dataset'] == 'CONTROL/SA1_XTD2_XGM/DOOCS/MAIN/pulseEnergy/photonFlux/timestamp'
assert 'RAW-R0450-DA01-S00001.h5' in problem['file']

# second, and default case, timestamp order doesn't matter
validator = FileValidator(FileAccess(data_aggregator_file))
validator.validate()
assert len(validator.problems) == 0


def test_main_file_non_h5(tmp_path, capsys):
not_h5 = tmp_path / 'notHDF5.h5'
Expand Down
26 changes: 18 additions & 8 deletions extra_data/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@


class FileValidator:
def __init__(self, file: FileAccess):
def __init__(self, file: FileAccess, timestamps_should_increase: bool = False):
self.file = file
self.filename = file.filename
self.timestamps_should_increase = timestamps_should_increase
self.problems = []

def validate(self):
Expand All @@ -41,7 +42,8 @@
self.problems = []
self.check_indices()
self.check_trainids()
self.check_timestamps()
if self.timestamps_should_increase:
self.check_timestamps()

return self.problems

Expand Down Expand Up @@ -231,20 +233,21 @@


def _check_file(args):
runpath, filename = args
runpath, filename, timestamps_should_increase = args
filepath = osp.join(runpath, filename)
fa, problems = _open_file(filepath)
if fa is not None:
fv = FileValidator(fa)
fv = FileValidator(fa, timestamps_should_increase=timestamps_should_increase)
problems.extend(fv.run_checks())
fa.close()
return filename, fa, problems


class RunValidator:
def __init__(self, run_dir: str, term_progress=False):
def __init__(self, run_dir: str, term_progress=False, timestamps_should_increase=False):
self.run_dir = run_dir
self.term_progress = term_progress
self.timestamps_should_increase = timestamps_should_increase
self.filenames = [f for f in os.listdir(run_dir) if f.endswith('.h5')]
self.file_accesses = []
self.problems = []
Expand Down Expand Up @@ -283,13 +286,15 @@
# prevent child processes from receiving KeyboardInterrupt
signal(SIGINT, SIG_IGN)

filepaths = [(self.run_dir, fn) for fn in sorted(self.filenames)]
check_args = [(self.run_dir, fn, self.timestamps_should_increase)
for fn in sorted(self.filenames)]
nfiles = len(self.filenames)
badfiles = []
self.progress(0, nfiles, 0, badfiles)

with Pool(initializer=initializer) as pool:
iterator = pool.imap_unordered(_check_file, filepaths)
iterator = pool.imap_unordered(
_check_file, check_args)
for done, (fname, fa, problems) in enumerate(iterator, start=1):
if problems:
self.problems.extend(problems)
Expand Down Expand Up @@ -331,13 +336,18 @@

ap = ArgumentParser(prog='extra-data-validate')
ap.add_argument('path', help="HDF5 file or run directory of HDF5 files.")
ap.add_argument('-ti', '--timestamps-increase',
help="Expect monotonically increasing timestamps.",
action="store_true")
args = ap.parse_args(argv)

path = args.path
timestamps_increase = args.timestamps_increase
if os.path.isdir(path):
print("Checking run directory:", path)
print()
validator = RunValidator(path, term_progress=True)
validator = RunValidator(path, term_progress=True,

Check warning on line 349 in extra_data/validation.py

View check run for this annotation

Codecov / codecov/patch

extra_data/validation.py#L349

Added line #L349 was not covered by tests
timestamps_should_increase=timestamps_increase)
else:
print("Checking file:", path)
fa, problems = _open_file(path)
Expand Down
Loading