-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #225 from openpreserve/dev/refactor-arg-parsing
Move CLI arg parsing out of fido.py into cli_args.py
- Loading branch information
Showing
10 changed files
with
194 additions
and
132 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import argparse | ||
from argparse import ArgumentParser, RawTextHelpFormatter | ||
from typing import Any, Dict, List | ||
|
||
|
||
def parse_cli_args(argv: List[str], defaults: Dict[str, Any]) -> argparse.Namespace: | ||
""" | ||
Parse command-line arguments. | ||
Args: | ||
argv (list[str]): List of command-line arguments. Could be sys.argv | ||
defaults (dict): Dictionary of default values. Expects to find configdir, bufsize and container_bufsize. | ||
Returns: | ||
argparse.Namespace: Parsed command-line arguments. Reference via name as in args.v or args.recurse. | ||
""" | ||
|
||
parser = ArgumentParser( | ||
description=defaults["description"], | ||
epilog=defaults["epilog"], | ||
fromfile_prefix_chars="@", | ||
formatter_class=RawTextHelpFormatter, | ||
) | ||
parser.add_argument("-v", default=False, action="store_true", help="show version information") | ||
parser.add_argument("-q", default=False, action="store_true", help="run (more) quietly") | ||
parser.add_argument("-recurse", default=False, action="store_true", help="recurse into subdirectories") | ||
parser.add_argument("-zip", default=False, action="store_true", help="recurse into zip and tar files") | ||
parser.add_argument( | ||
"-noextension", | ||
default=False, | ||
action="store_true", | ||
help="disable extension matching, reduces number of matches but may reduce false positives", | ||
) | ||
parser.add_argument( | ||
"-nocontainer", | ||
default=False, | ||
action="store_true", | ||
help="disable deep scan of container documents, increases speed but may reduce accuracy with big files", | ||
) | ||
parser.add_argument( | ||
"-pronom_only", | ||
default=False, | ||
action="store_true", | ||
help="disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results", | ||
) | ||
|
||
group = parser.add_mutually_exclusive_group() | ||
group.add_argument( | ||
"-input", default=False, help="file containing a list of files to check, one per line. - means stdin" | ||
) | ||
group.add_argument( | ||
"files", | ||
nargs="*", | ||
default=[], | ||
metavar="FILE", | ||
help="files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.", | ||
) | ||
|
||
parser.add_argument("-filename", default=None, help="filename if file contents passed through STDIN") | ||
parser.add_argument( | ||
"-useformats", | ||
metavar="INCLUDEPUIDS", | ||
default=None, | ||
help="comma separated string of formats to use in identification", | ||
) | ||
parser.add_argument( | ||
"-nouseformats", | ||
metavar="EXCLUDEPUIDS", | ||
default=None, | ||
help="comma separated string of formats not to use in identification", | ||
) | ||
parser.add_argument( | ||
"-matchprintf", | ||
metavar="FORMATSTRING", | ||
default=None, | ||
help="format string (Python style) to use on match. See nomatchprintf, README.txt.", | ||
) | ||
parser.add_argument( | ||
"-nomatchprintf", | ||
metavar="FORMATSTRING", | ||
default=None, | ||
help="format string (Python style) to use if no match. See README.txt", | ||
) | ||
parser.add_argument( | ||
"-bufsize", | ||
type=int, | ||
default=None, | ||
help=f"size (in bytes) of the buffer to match against (default={defaults['bufsize']})", | ||
) | ||
parser.add_argument( | ||
"-sigs", | ||
default=None, | ||
metavar="SIG_ACT", | ||
help='SIG_ACT "check" for new version\nSIG_ACT "update" to latest\nSIG_ACT "list" available versions\nSIG_ACT "n" use version n.', | ||
) | ||
parser.add_argument( | ||
"-container_bufsize", | ||
type=int, | ||
default=None, | ||
help=f"size (in bytes) of the buffer to match against (default={defaults['container_bufsize']}).", | ||
) | ||
parser.add_argument( | ||
"-loadformats", default=None, metavar="XML1,...,XMLn", help="comma separated string of XML format files to add." | ||
) | ||
parser.add_argument( | ||
"-confdir", | ||
default=defaults["config_dir"], | ||
help="configuration directory to load_fido_xml, for example, the format specifications from.", | ||
) | ||
|
||
return parser.parse_args(argv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import pytest | ||
|
||
from fido.cli_args import parse_cli_args | ||
from fido.fido import defaults | ||
|
||
# Common argument string | ||
|
||
|
||
def test_parse_args_input_valid(): | ||
arg_string = ( | ||
"-v -q -recurse -zip -noextension -nocontainer -pronom_only " | ||
"-input files.txt " | ||
"-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" | ||
) | ||
args = parse_cli_args(arg_string.split(), defaults) | ||
assert args.v | ||
assert args.q | ||
assert args.recurse | ||
assert args.zip | ||
assert args.noextension | ||
assert args.nocontainer | ||
assert args.pronom_only | ||
assert args.input == "files.txt" | ||
assert args.useformats == "fmt1,fmt2" | ||
assert args.nouseformats == "fmt3,fmt4" | ||
|
||
|
||
def test_parse_args_files_valid(): | ||
arg_string = "-q -zip file1.ext file2.ext" | ||
args = parse_cli_args(arg_string.split(), defaults) | ||
assert args.q | ||
assert args.zip | ||
assert not args.noextension | ||
assert not args.nocontainer | ||
assert not args.pronom_only | ||
assert args.files == ["file1.ext", "file2.ext"] | ||
assert args.useformats is None | ||
assert args.nouseformats is None | ||
|
||
|
||
def test_parse_args_invalid(): | ||
arg_string = "-q -zip -bad_arg file1.ext file2.ext" | ||
with pytest.raises(SystemExit): | ||
parse_cli_args(arg_string.split(), defaults) | ||
|
||
|
||
def test_parse_files_and_input_invalid(): | ||
arg_string = "-q -zip -input files.txt file1.ext file2.ext" | ||
with pytest.raises(SystemExit): | ||
parse_cli_args(arg_string.split(), defaults) |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,15 @@ | ||
import os | ||
|
||
from fido.package import ZipPackage | ||
import pytest | ||
|
||
from fido.package import ZipPackage | ||
|
||
FIXTURES_DIR = os.path.normpath(os.path.join(__file__, '..', 'fixtures')) | ||
TEST_DATA_BAD_PACKAGES = os.path.normpath(os.path.join(__file__, "..", "test_data/hard_packages")) | ||
|
||
|
||
def test_bad_zips(): | ||
for filename in ('bad.zip', 'worse.zip', 'unicode.zip'): | ||
p = ZipPackage(os.path.join(FIXTURES_DIR, filename), {}) | ||
r = p.detect_formats() | ||
assert isinstance(r, list) and len(r) == 0 | ||
# None of these files should be identified as packages? | ||
@pytest.mark.parametrize("filename", ["bad.zip", "worse.zip", "unicode.zip", "foo.zip", "foo.tar"]) | ||
def test_bad_zip(filename): | ||
p = ZipPackage(os.path.join(TEST_DATA_BAD_PACKAGES, filename), {}) | ||
r = p.detect_formats() | ||
assert isinstance(r, list) and len(r) == 0 |