From 8c5ad5174bc52b4109dfaff464e34bfacb4e27fe Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Wed, 11 Sep 2024 18:11:21 +0100 Subject: [PATCH 1/7] Interim commit to help with rebasing after a PR was approved and merged into 1.8 --- fido/cli_args.py | 78 ++++++++++++++++++ pyproject.toml | 6 +- tests/test_cli_args.py | 61 ++++++++++++++ .../hard_packages}/bad.zip | Bin .../hard_packages}/foo.tar | Bin .../hard_packages}/foo.zip | Bin .../hard_packages}/unicode.zip | Bin .../hard_packages}/worse.zip | Bin tests/test_package.py | 16 ++-- 9 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 fido/cli_args.py create mode 100644 tests/test_cli_args.py rename tests/{fixtures => test_data/hard_packages}/bad.zip (100%) rename tests/{fixtures => test_data/hard_packages}/foo.tar (100%) rename tests/{fixtures => test_data/hard_packages}/foo.zip (100%) rename tests/{fixtures => test_data/hard_packages}/unicode.zip (100%) rename tests/{fixtures => test_data/hard_packages}/worse.zip (100%) diff --git a/fido/cli_args.py b/fido/cli_args.py new file mode 100644 index 00000000..260d7608 --- /dev/null +++ b/fido/cli_args.py @@ -0,0 +1,78 @@ +import argparse +import sys +from argparse import ArgumentParser, RawTextHelpFormatter + + +def build_parser() -> ArgumentParser: + defaults = { + "description": "FIDO - File Identification Tool", + "epilog": "For more information, visit the official documentation.", + } + + parser = ArgumentParser( + description=defaults["description"], + epilog=defaults["epilog"], + fromfile_prefix_chars="@", + formatter_class=RawTextHelpFormatter, + ) + parser.add_argument("-v", default=False, action="store_true", help="show version information") + parser.add_argument("-q", default=False, action="store_true", help="run (more) quietly") + parser.add_argument("-recurse", default=False, action="store_true", help="recurse into subdirectories") + parser.add_argument("-zip", default=False, action="store_true", help="recurse into zip and tar files") + parser.add_argument( + "-noextension", + default=False, + action="store_true", + help="disable extension matching, reduces number of matches but may reduce false positives", + ) + parser.add_argument( + "-nocontainer", + default=False, + action="store_true", + help="disable deep scan of container documents, increases speed but may reduce accuracy with big files", + ) + parser.add_argument( + "-pronom_only", + default=False, + action="store_true", + help="disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results", + ) + + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-input", default=False, help="file containing a list of files to check, one per line. - means stdin" + ) + group.add_argument( + "files", + nargs="*", + default=[], + metavar="FILE", + help="files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.", + ) + + parser.add_argument("-filename", default=None, help="filename if file contents passed through STDIN") + parser.add_argument( + "-useformats", + metavar="INCLUDEPUIDS", + default=None, + help="comma separated string of formats to use in identification", + ) + parser.add_argument( + "-nouseformats", + metavar="EXCLUDEPUIDS", + default=None, + help="comma separated string of formats not to use in identification", + ) + + return parser + + +def parse_args(parser: ArgumentParser) -> argparse.Namespace: + try: + args = parser.parse_args() + except argparse.ArgumentError as e: + parser.print_help() + print(f"\nError: {e}\n") + sys.exit(1) + + return args diff --git a/pyproject.toml b/pyproject.toml index dcc69997..9aa09967 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,7 @@ [build-system] -requires = ["setuptools>=42", "wheel", "setuptools-git-versioning>=2.0,<3"] +requires = ["setuptools>=42", "wheel", "twine", "setuptools-git-versioning>=2.0,<3"] build-backend = "setuptools.build_meta" -# These were in requirements/packing.txt -# twine>=1.8,<1.9 -# wheel==0.38.1 - [project] name = "opf-fido" dynamic = ["version"] diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py new file mode 100644 index 00000000..4405a86f --- /dev/null +++ b/tests/test_cli_args.py @@ -0,0 +1,61 @@ +import argparse + +import pytest + +from fido.cli_args import build_parser, parse_args + +# Common argument string +ARG_STRING = ( + "-v -q -recurse -zip -noextension -nocontainer -pronom_only" + "-input files.txt" + "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" +) + +ARG_STRING = ( + "-v -q -recurse -zip -noextension -nocontainer -pronom_only" + "-input files.txt" + "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" +) + + +def test_build_parser(): + parser = build_parser() + assert isinstance(parser, argparse.ArgumentParser) + + # Check if all expected arguments are present + expected_args = ARG_STRING.split() + for arg in expected_args: + assert arg in parser._option_string_actions + + +def test_parse_args_valid(): + parser = build_parser() + + args = parse_args(parser.parse_args(ARG_STRING.split())) + + assert args.v is True + assert args.q is True + assert args.recurse is True + assert args.zip is True + assert args.noextension is True + assert args.nocontainer is True + assert args.pronom_only is True + assert args.input == "input_file" + assert args.files == ["file1", "file2"] + assert args.filename == "filename" + assert args.useformats == "fmt1,fmt2" + assert args.nouseformats == "fmt3,fmt4" + + +def test_parse_args_invalid(monkeypatch): + parser = build_parser() + + # Simulate invalid argument input + monkeypatch.setattr("sys.argv", ["prog", "--invalid"]) + with pytest.raises(SystemExit): + parse_args(parser) + + # Simulate missing required argument + monkeypatch.setattr("sys.argv", ["prog", "-input"]) + with pytest.raises(SystemExit): + parse_args(parser) diff --git a/tests/fixtures/bad.zip b/tests/test_data/hard_packages/bad.zip similarity index 100% rename from tests/fixtures/bad.zip rename to tests/test_data/hard_packages/bad.zip diff --git a/tests/fixtures/foo.tar b/tests/test_data/hard_packages/foo.tar similarity index 100% rename from tests/fixtures/foo.tar rename to tests/test_data/hard_packages/foo.tar diff --git a/tests/fixtures/foo.zip b/tests/test_data/hard_packages/foo.zip similarity index 100% rename from tests/fixtures/foo.zip rename to tests/test_data/hard_packages/foo.zip diff --git a/tests/fixtures/unicode.zip b/tests/test_data/hard_packages/unicode.zip similarity index 100% rename from tests/fixtures/unicode.zip rename to tests/test_data/hard_packages/unicode.zip diff --git a/tests/fixtures/worse.zip b/tests/test_data/hard_packages/worse.zip similarity index 100% rename from tests/fixtures/worse.zip rename to tests/test_data/hard_packages/worse.zip diff --git a/tests/test_package.py b/tests/test_package.py index f1565142..b4123cd2 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -1,13 +1,15 @@ import os -from fido.package import ZipPackage +import pytest +from fido.package import ZipPackage -FIXTURES_DIR = os.path.normpath(os.path.join(__file__, '..', 'fixtures')) +TEST_DATA_BAD_PACKAGES = os.path.normpath(os.path.join(__file__, "..", "test_data/hard_packages")) -def test_bad_zips(): - for filename in ('bad.zip', 'worse.zip', 'unicode.zip'): - p = ZipPackage(os.path.join(FIXTURES_DIR, filename), {}) - r = p.detect_formats() - assert isinstance(r, list) and len(r) == 0 +# None of these files should be identified as packages? +@pytest.mark.parametrize("filename", ["bad.zip", "worse.zip", "unicode.zip", "foo.zip", "foo.tar"]) +def test_bad_zip(filename): + p = ZipPackage(os.path.join(TEST_DATA_BAD_PACKAGES, filename), {}) + r = p.detect_formats() + assert isinstance(r, list) and len(r) == 0 From 0d5f11c44a6d1994db0db9497ad093b61d542495 Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Wed, 11 Sep 2024 18:11:21 +0100 Subject: [PATCH 2/7] Interim commit to help with rebasing after a PR was approved and merged into 1.8 --- fido/cli_args.py | 78 ++++++++++++++++++ pyproject.toml | 6 +- tests/test_cli_args.py | 61 ++++++++++++++ .../hard_packages}/bad.zip | Bin .../hard_packages}/foo.tar | Bin .../hard_packages}/foo.zip | Bin .../hard_packages}/unicode.zip | Bin .../hard_packages}/worse.zip | Bin tests/test_package.py | 16 ++-- 9 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 fido/cli_args.py create mode 100644 tests/test_cli_args.py rename tests/{fixtures => test_data/hard_packages}/bad.zip (100%) rename tests/{fixtures => test_data/hard_packages}/foo.tar (100%) rename tests/{fixtures => test_data/hard_packages}/foo.zip (100%) rename tests/{fixtures => test_data/hard_packages}/unicode.zip (100%) rename tests/{fixtures => test_data/hard_packages}/worse.zip (100%) diff --git a/fido/cli_args.py b/fido/cli_args.py new file mode 100644 index 00000000..260d7608 --- /dev/null +++ b/fido/cli_args.py @@ -0,0 +1,78 @@ +import argparse +import sys +from argparse import ArgumentParser, RawTextHelpFormatter + + +def build_parser() -> ArgumentParser: + defaults = { + "description": "FIDO - File Identification Tool", + "epilog": "For more information, visit the official documentation.", + } + + parser = ArgumentParser( + description=defaults["description"], + epilog=defaults["epilog"], + fromfile_prefix_chars="@", + formatter_class=RawTextHelpFormatter, + ) + parser.add_argument("-v", default=False, action="store_true", help="show version information") + parser.add_argument("-q", default=False, action="store_true", help="run (more) quietly") + parser.add_argument("-recurse", default=False, action="store_true", help="recurse into subdirectories") + parser.add_argument("-zip", default=False, action="store_true", help="recurse into zip and tar files") + parser.add_argument( + "-noextension", + default=False, + action="store_true", + help="disable extension matching, reduces number of matches but may reduce false positives", + ) + parser.add_argument( + "-nocontainer", + default=False, + action="store_true", + help="disable deep scan of container documents, increases speed but may reduce accuracy with big files", + ) + parser.add_argument( + "-pronom_only", + default=False, + action="store_true", + help="disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results", + ) + + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-input", default=False, help="file containing a list of files to check, one per line. - means stdin" + ) + group.add_argument( + "files", + nargs="*", + default=[], + metavar="FILE", + help="files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.", + ) + + parser.add_argument("-filename", default=None, help="filename if file contents passed through STDIN") + parser.add_argument( + "-useformats", + metavar="INCLUDEPUIDS", + default=None, + help="comma separated string of formats to use in identification", + ) + parser.add_argument( + "-nouseformats", + metavar="EXCLUDEPUIDS", + default=None, + help="comma separated string of formats not to use in identification", + ) + + return parser + + +def parse_args(parser: ArgumentParser) -> argparse.Namespace: + try: + args = parser.parse_args() + except argparse.ArgumentError as e: + parser.print_help() + print(f"\nError: {e}\n") + sys.exit(1) + + return args diff --git a/pyproject.toml b/pyproject.toml index dcc69997..9aa09967 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,7 @@ [build-system] -requires = ["setuptools>=42", "wheel", "setuptools-git-versioning>=2.0,<3"] +requires = ["setuptools>=42", "wheel", "twine", "setuptools-git-versioning>=2.0,<3"] build-backend = "setuptools.build_meta" -# These were in requirements/packing.txt -# twine>=1.8,<1.9 -# wheel==0.38.1 - [project] name = "opf-fido" dynamic = ["version"] diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py new file mode 100644 index 00000000..4405a86f --- /dev/null +++ b/tests/test_cli_args.py @@ -0,0 +1,61 @@ +import argparse + +import pytest + +from fido.cli_args import build_parser, parse_args + +# Common argument string +ARG_STRING = ( + "-v -q -recurse -zip -noextension -nocontainer -pronom_only" + "-input files.txt" + "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" +) + +ARG_STRING = ( + "-v -q -recurse -zip -noextension -nocontainer -pronom_only" + "-input files.txt" + "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" +) + + +def test_build_parser(): + parser = build_parser() + assert isinstance(parser, argparse.ArgumentParser) + + # Check if all expected arguments are present + expected_args = ARG_STRING.split() + for arg in expected_args: + assert arg in parser._option_string_actions + + +def test_parse_args_valid(): + parser = build_parser() + + args = parse_args(parser.parse_args(ARG_STRING.split())) + + assert args.v is True + assert args.q is True + assert args.recurse is True + assert args.zip is True + assert args.noextension is True + assert args.nocontainer is True + assert args.pronom_only is True + assert args.input == "input_file" + assert args.files == ["file1", "file2"] + assert args.filename == "filename" + assert args.useformats == "fmt1,fmt2" + assert args.nouseformats == "fmt3,fmt4" + + +def test_parse_args_invalid(monkeypatch): + parser = build_parser() + + # Simulate invalid argument input + monkeypatch.setattr("sys.argv", ["prog", "--invalid"]) + with pytest.raises(SystemExit): + parse_args(parser) + + # Simulate missing required argument + monkeypatch.setattr("sys.argv", ["prog", "-input"]) + with pytest.raises(SystemExit): + parse_args(parser) diff --git a/tests/fixtures/bad.zip b/tests/test_data/hard_packages/bad.zip similarity index 100% rename from tests/fixtures/bad.zip rename to tests/test_data/hard_packages/bad.zip diff --git a/tests/fixtures/foo.tar b/tests/test_data/hard_packages/foo.tar similarity index 100% rename from tests/fixtures/foo.tar rename to tests/test_data/hard_packages/foo.tar diff --git a/tests/fixtures/foo.zip b/tests/test_data/hard_packages/foo.zip similarity index 100% rename from tests/fixtures/foo.zip rename to tests/test_data/hard_packages/foo.zip diff --git a/tests/fixtures/unicode.zip b/tests/test_data/hard_packages/unicode.zip similarity index 100% rename from tests/fixtures/unicode.zip rename to tests/test_data/hard_packages/unicode.zip diff --git a/tests/fixtures/worse.zip b/tests/test_data/hard_packages/worse.zip similarity index 100% rename from tests/fixtures/worse.zip rename to tests/test_data/hard_packages/worse.zip diff --git a/tests/test_package.py b/tests/test_package.py index f1565142..b4123cd2 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -1,13 +1,15 @@ import os -from fido.package import ZipPackage +import pytest +from fido.package import ZipPackage -FIXTURES_DIR = os.path.normpath(os.path.join(__file__, '..', 'fixtures')) +TEST_DATA_BAD_PACKAGES = os.path.normpath(os.path.join(__file__, "..", "test_data/hard_packages")) -def test_bad_zips(): - for filename in ('bad.zip', 'worse.zip', 'unicode.zip'): - p = ZipPackage(os.path.join(FIXTURES_DIR, filename), {}) - r = p.detect_formats() - assert isinstance(r, list) and len(r) == 0 +# None of these files should be identified as packages? +@pytest.mark.parametrize("filename", ["bad.zip", "worse.zip", "unicode.zip", "foo.zip", "foo.tar"]) +def test_bad_zip(filename): + p = ZipPackage(os.path.join(TEST_DATA_BAD_PACKAGES, filename), {}) + r = p.detect_formats() + assert isinstance(r, list) and len(r) == 0 From 630be3a5b7bf784c087340e6e7a42b6af6d79f66 Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Thu, 12 Sep 2024 17:37:56 +0100 Subject: [PATCH 3/7] Refactor `fido.py` moving CLI argument parsing into its own `cli_args.py`. Add some tests to the argument parsing. Add the CONFIG_DIR to the dictionary of defaults in fido.py. --- fido/cli_args.py | 65 ++++++++++++++------ fido/fido.py | 131 ++++------------------------------------- tests/test_cli_args.py | 91 +++++++++++++--------------- 3 files changed, 103 insertions(+), 184 deletions(-) diff --git a/fido/cli_args.py b/fido/cli_args.py index 260d7608..a6919853 100644 --- a/fido/cli_args.py +++ b/fido/cli_args.py @@ -3,11 +3,15 @@ from argparse import ArgumentParser, RawTextHelpFormatter -def build_parser() -> ArgumentParser: - defaults = { - "description": "FIDO - File Identification Tool", - "epilog": "For more information, visit the official documentation.", - } +def parse_cli_args(argv: list[str], defaults: dict) -> argparse.Namespace: + """ + Parse command-line arguments. + Args: + argv (list[str]): List of command-line arguments. Could be sys.argv + defaults (dict): Dictionary of default values. Expects to find configdir, bufsize and container_bufsize. + Returns: + argparse.Namespace: Parsed command-line arguments. Reference via name as in args.v or args.recurse. + """ parser = ArgumentParser( description=defaults["description"], @@ -63,16 +67,43 @@ def build_parser() -> ArgumentParser: default=None, help="comma separated string of formats not to use in identification", ) + parser.add_argument( + "-matchprintf", + metavar="FORMATSTRING", + default=None, + help="format string (Python style) to use on match. See nomatchprintf, README.txt.", + ) + parser.add_argument( + "-nomatchprintf", + metavar="FORMATSTRING", + default=None, + help="format string (Python style) to use if no match. See README.txt", + ) + parser.add_argument( + "-bufsize", + type=int, + default=None, + help=f"size (in bytes) of the buffer to match against (default={defaults['bufsize']})", + ) + parser.add_argument( + "-sigs", + default=None, + metavar="SIG_ACT", + help='SIG_ACT "check" for new version\nSIG_ACT "update" to latest\nSIG_ACT "list" available versions\nSIG_ACT "n" use version n.', + ) + parser.add_argument( + "-container_bufsize", + type=int, + default=None, + help=f"size (in bytes) of the buffer to match against (default={defaults['container_bufsize']}).", + ) + parser.add_argument( + "-loadformats", default=None, metavar="XML1,...,XMLn", help="comma separated string of XML format files to add." + ) + parser.add_argument( + "-confdir", + default=defaults["config_dir"], + help="configuration directory to load_fido_xml, for example, the format specifications from.", + ) - return parser - - -def parse_args(parser: ArgumentParser) -> argparse.Namespace: - try: - args = parser.parse_args() - except argparse.ArgumentError as e: - parser.print_help() - print(f"\nError: {e}\n") - sys.exit(1) - - return args + return parser.parse_args(argv) diff --git a/fido/fido.py b/fido/fido.py index cb2c1ef1..dcf333c2 100755 --- a/fido/fido.py +++ b/fido/fido.py @@ -13,23 +13,20 @@ import sys import tarfile import tempfile -from argparse import ArgumentParser, RawTextHelpFormatter -from contextlib import closing - -try: - from time import perf_counter -except ImportError: - from time import clock as perf_counter - import zipfile +from contextlib import closing +from time import perf_counter +from typing import Optional from xml.etree import cElementTree as ET from fido import CONFIG_DIR, __version__ from fido.char_handler import escape +from fido.cli_args import parse_cli_args from fido.package import OlePackage, ZipPackage from fido.versions import get_local_versions, sig_file_actions defaults = { + "config_dir": CONFIG_DIR, "bufsize": 128 * 1024, # (bytes) "regexcachesize": 2084, # (bytes) "printmatch": 'OK,%(info.time)s,%(info.puid)s,"%(info.formatname)s","%(info.signaturename)s",%(info.filesize)s,"%(info.filename)s","%(info.mimetype)s","%(info.matchtype)s"\n', @@ -74,8 +71,8 @@ class Fido: def __init__( self, - quiet=False, - bufsize=None, + quiet: bool = False, + bufsize: Optional[int] = None, container_bufsize=None, printnomatch=None, printmatch=None, @@ -793,107 +790,7 @@ def main(args=None): """Main FIDO method.""" if not args: args = sys.argv[1:] - - parser = ArgumentParser( - description=defaults["description"], - epilog=defaults["epilog"], - fromfile_prefix_chars="@", - formatter_class=RawTextHelpFormatter, - ) - parser.add_argument("-v", default=False, action="store_true", help="show version information") - parser.add_argument("-q", default=False, action="store_true", help="run (more) quietly") - parser.add_argument("-recurse", default=False, action="store_true", help="recurse into subdirectories") - parser.add_argument("-zip", default=False, action="store_true", help="recurse into zip and tar files") - parser.add_argument( - "-noextension", - default=False, - action="store_true", - help="disable extension matching, reduces number of matches but may reduce false positives", - ) - parser.add_argument( - "-nocontainer", - default=False, - action="store_true", - help="disable deep scan of container documents, increases speed but may reduce accuracy with big files", - ) - parser.add_argument( - "-pronom_only", - default=False, - action="store_true", - help="disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results", - ) - - group = parser.add_mutually_exclusive_group() - group.add_argument( - "-input", default=False, help="file containing a list of files to check, one per line. - means stdin" - ) - group.add_argument( - "files", - nargs="*", - default=[], - metavar="FILE", - help="files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.", - ) - - parser.add_argument("-filename", default=None, help="filename if file contents passed through STDIN") - parser.add_argument( - "-useformats", - metavar="INCLUDEPUIDS", - default=None, - help="comma separated string of formats to use in identification", - ) - parser.add_argument( - "-nouseformats", - metavar="EXCLUDEPUIDS", - default=None, - help="comma separated string of formats not to use in identification", - ) - parser.add_argument( - "-matchprintf", - metavar="FORMATSTRING", - default=None, - help="format string (Python style) to use on match. See nomatchprintf, README.txt.", - ) - parser.add_argument( - "-nomatchprintf", - metavar="FORMATSTRING", - default=None, - help="format string (Python style) to use if no match. See README.txt", - ) - parser.add_argument( - "-bufsize", - type=int, - default=None, - help="size (in bytes) of the buffer to match against (default=" + str(defaults["bufsize"]) + " bytes)", - ) - parser.add_argument( - "-sigs", - default=None, - metavar="SIG_ACT", - help='SIG_ACT "check" for new version\nSIG_ACT "update" to latest\nSIG_ACT "list" available versions\nSIG_ACT "n" use version n.', - ) - parser.add_argument( - "-container_bufsize", - type=int, - default=None, - help="size (in bytes) of the buffer to match against (default=" - + str(defaults["container_bufsize"]) - + " bytes)", - ) - parser.add_argument( - "-loadformats", default=None, metavar="XML1,...,XMLn", help="comma separated string of XML format files to add." - ) - parser.add_argument( - "-confdir", - default=CONFIG_DIR, - help="configuration directory to load_fido_xml, for example, the format specifications from.", - ) - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - args = parser.parse_args(args) - + args = parse_cli_args(args, defaults) timer = PerfTimer() versions = get_local_versions(args.confdir) @@ -904,15 +801,13 @@ def main(args=None): defaults["format_files"] = [defaults["xml_pronomSignature"]] if args.pronom_only: - versionHeader = "FIDO v{0} ({1}, {2})\n".format( - __version__, defaults["xml_pronomSignature"], defaults["containersignature_file"] + versionHeader = ( + f"FIDO v{__version__} ({defaults['xml_pronomSignature']}, {defaults['containersignature_file']})\n" ) else: - versionHeader = "FIDO v{0} ({1}, {2}, {3})\n".format( - __version__, - defaults["xml_pronomSignature"], - defaults["containersignature_file"], - defaults["xml_fidoExtensionSignature"], + versionHeader = ( + f"FIDO v{__version__} ({defaults['xml_pronomSignature']}, {defaults['containersignature_file']}, " + f"{defaults['xml_fidoExtensionSignature']})\n" ) defaults["format_files"].append(defaults["xml_fidoExtensionSignature"]) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 4405a86f..a846bad0 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -1,61 +1,54 @@ -import argparse - import pytest -from fido.cli_args import build_parser, parse_args +from fido.cli_args import parse_cli_args +from fido.fido import defaults # Common argument string -ARG_STRING = ( - "-v -q -recurse -zip -noextension -nocontainer -pronom_only" - "-input files.txt" - "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" -) - -ARG_STRING = ( - "-v -q -recurse -zip -noextension -nocontainer -pronom_only" - "-input files.txt" - "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" -) - - -def test_build_parser(): - parser = build_parser() - assert isinstance(parser, argparse.ArgumentParser) - - # Check if all expected arguments are present - expected_args = ARG_STRING.split() - for arg in expected_args: - assert arg in parser._option_string_actions - - -def test_parse_args_valid(): - parser = build_parser() - - args = parse_args(parser.parse_args(ARG_STRING.split())) - - assert args.v is True - assert args.q is True - assert args.recurse is True - assert args.zip is True - assert args.noextension is True - assert args.nocontainer is True - assert args.pronom_only is True - assert args.input == "input_file" - assert args.files == ["file1", "file2"] - assert args.filename == "filename" + + +def test_parse_args_input_valid(): + arg_string = ( + "-v -q -recurse -zip -noextension -nocontainer -pronom_only " + "-input files.txt " + "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" + ) + args = parse_cli_args(arg_string.split(), defaults) + print(arg_string.split()) + print(args) + assert args.v + assert args.q + assert args.recurse + assert args.zip + assert args.noextension + assert args.nocontainer + assert args.pronom_only + assert args.input == "files.txt" assert args.useformats == "fmt1,fmt2" assert args.nouseformats == "fmt3,fmt4" -def test_parse_args_invalid(monkeypatch): - parser = build_parser() +def test_parse_args_files_valid(): + arg_string = "-q -zip file1.ext file2.ext" + args = parse_cli_args(arg_string.split(), defaults) + print(arg_string.split()) + print(args) + assert args.q + assert args.zip + assert args.noextension == False + assert args.nocontainer == False + assert args.pronom_only == False + assert args.files == ["file1.ext", "file2.ext"] + assert args.useformats is None + assert args.nouseformats is None - # Simulate invalid argument input - monkeypatch.setattr("sys.argv", ["prog", "--invalid"]) + +def test_parse_args_invalid(): + arg_string = "-q -zip -bad_arg file1.ext file2.ext" with pytest.raises(SystemExit): - parse_args(parser) + args = parse_cli_args(arg_string.split(), defaults) + - # Simulate missing required argument - monkeypatch.setattr("sys.argv", ["prog", "-input"]) +def test_parse_files_and_input_invalid(): + arg_string = "-q -zip -input files.txt file1.ext file2.ext" with pytest.raises(SystemExit): - parse_args(parser) + args = parse_cli_args(arg_string.split(), defaults) From eb1d7006580447cf6e42bc5285b0cd39cc099095 Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Thu, 12 Sep 2024 17:46:23 +0100 Subject: [PATCH 4/7] Removed unused import. --- fido/cli_args.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fido/cli_args.py b/fido/cli_args.py index a6919853..ac28fa36 100644 --- a/fido/cli_args.py +++ b/fido/cli_args.py @@ -1,5 +1,4 @@ import argparse -import sys from argparse import ArgumentParser, RawTextHelpFormatter From 30d2d0dd0ea182511fcf7481f26ef95bb4933f8e Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Thu, 12 Sep 2024 17:51:18 +0100 Subject: [PATCH 5/7] Minor edits to make flake8 happier. --- fido/fido.py | 14 ++++++++++++-- tests/test_cli_args.py | 10 +++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/fido/fido.py b/fido/fido.py index dcf333c2..e298b934 100755 --- a/fido/fido.py +++ b/fido/fido.py @@ -634,7 +634,12 @@ def walk_zip(self, filename, fileobj=None, extension=True): with zipstream.open(item) as source: self.copy_stream(source, target) # target.seek(0) - self.identify_contents(item_name, target, self.container_type(matches), extension=extension) + self.identify_contents( + item_name, + target, + self.container_type(matches), + extension=extension, + ) except IOError: sys.stderr.write("FIDO: ZipError {0}\n".format(filename)) except zipfile.BadZipfile: @@ -663,7 +668,12 @@ def walk_tar(self, filename, fileobj, extension=True): self.handle_matches(tar_item_name, matches, timer.duration()) if self.container_type(matches): f.seek(0) - self.identify_contents(tar_item_name, f, self.container_type(matches), extension=extension) + self.identify_contents( + tar_item_name, + f, + self.container_type(matches), + extension=extension, + ) except tarfile.TarError: sys.stderr.write("FIDO: Error: TarError {0}\n".format(filename)) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index a846bad0..0169a52b 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -34,9 +34,9 @@ def test_parse_args_files_valid(): print(args) assert args.q assert args.zip - assert args.noextension == False - assert args.nocontainer == False - assert args.pronom_only == False + assert args.noextension + assert args.nocontainer + assert args.pronom_only assert args.files == ["file1.ext", "file2.ext"] assert args.useformats is None assert args.nouseformats is None @@ -45,10 +45,10 @@ def test_parse_args_files_valid(): def test_parse_args_invalid(): arg_string = "-q -zip -bad_arg file1.ext file2.ext" with pytest.raises(SystemExit): - args = parse_cli_args(arg_string.split(), defaults) + parse_cli_args(arg_string.split(), defaults) def test_parse_files_and_input_invalid(): arg_string = "-q -zip -input files.txt file1.ext file2.ext" with pytest.raises(SystemExit): - args = parse_cli_args(arg_string.split(), defaults) + parse_cli_args(arg_string.split(), defaults) From bbbffbe408cf0cb48ab32ddc5cf5d88ae23b7fc9 Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Thu, 12 Sep 2024 17:54:27 +0100 Subject: [PATCH 6/7] More minor edits to make flake8 happy. --- fido/cli_args.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fido/cli_args.py b/fido/cli_args.py index ac28fa36..fc74bfe9 100644 --- a/fido/cli_args.py +++ b/fido/cli_args.py @@ -1,8 +1,9 @@ import argparse from argparse import ArgumentParser, RawTextHelpFormatter +from typing import Any, Dict, List -def parse_cli_args(argv: list[str], defaults: dict) -> argparse.Namespace: +def parse_cli_args(argv: List[str], defaults: Dict[str, Any]) -> argparse.Namespace: """ Parse command-line arguments. Args: From 529b4839b08644fe3a58b21d322c61da40679a82 Mon Sep 17 00:00:00 2001 From: Adam Farquhar Date: Thu, 12 Sep 2024 17:58:38 +0100 Subject: [PATCH 7/7] Minor edits to make pytest happy. --- tests/test_cli_args.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 0169a52b..3be8ae17 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -13,8 +13,6 @@ def test_parse_args_input_valid(): "-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4" ) args = parse_cli_args(arg_string.split(), defaults) - print(arg_string.split()) - print(args) assert args.v assert args.q assert args.recurse @@ -30,13 +28,11 @@ def test_parse_args_input_valid(): def test_parse_args_files_valid(): arg_string = "-q -zip file1.ext file2.ext" args = parse_cli_args(arg_string.split(), defaults) - print(arg_string.split()) - print(args) assert args.q assert args.zip - assert args.noextension - assert args.nocontainer - assert args.pronom_only + assert not args.noextension + assert not args.nocontainer + assert not args.pronom_only assert args.files == ["file1.ext", "file2.ext"] assert args.useformats is None assert args.nouseformats is None