Skip to content

Commit

Permalink
Refactor fido.py moving CLI argument parsing into its own `cli_args…
Browse files Browse the repository at this point in the history
….py`. Add some tests to the argument parsing.

Add the CONFIG_DIR to the dictionary of defaults in fido.py.
  • Loading branch information
adamfarquhar committed Sep 12, 2024
1 parent aaa38b0 commit 630be3a
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 184 deletions.
65 changes: 48 additions & 17 deletions fido/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
from argparse import ArgumentParser, RawTextHelpFormatter


def build_parser() -> ArgumentParser:
defaults = {
"description": "FIDO - File Identification Tool",
"epilog": "For more information, visit the official documentation.",
}
def parse_cli_args(argv: list[str], defaults: dict) -> argparse.Namespace:
"""
Parse command-line arguments.
Args:
argv (list[str]): List of command-line arguments. Could be sys.argv
defaults (dict): Dictionary of default values. Expects to find configdir, bufsize and container_bufsize.
Returns:
argparse.Namespace: Parsed command-line arguments. Reference via name as in args.v or args.recurse.
"""

parser = ArgumentParser(
description=defaults["description"],
Expand Down Expand Up @@ -63,16 +67,43 @@ def build_parser() -> ArgumentParser:
default=None,
help="comma separated string of formats not to use in identification",
)
parser.add_argument(
"-matchprintf",
metavar="FORMATSTRING",
default=None,
help="format string (Python style) to use on match. See nomatchprintf, README.txt.",
)
parser.add_argument(
"-nomatchprintf",
metavar="FORMATSTRING",
default=None,
help="format string (Python style) to use if no match. See README.txt",
)
parser.add_argument(
"-bufsize",
type=int,
default=None,
help=f"size (in bytes) of the buffer to match against (default={defaults['bufsize']})",
)
parser.add_argument(
"-sigs",
default=None,
metavar="SIG_ACT",
help='SIG_ACT "check" for new version\nSIG_ACT "update" to latest\nSIG_ACT "list" available versions\nSIG_ACT "n" use version n.',
)
parser.add_argument(
"-container_bufsize",
type=int,
default=None,
help=f"size (in bytes) of the buffer to match against (default={defaults['container_bufsize']}).",
)
parser.add_argument(
"-loadformats", default=None, metavar="XML1,...,XMLn", help="comma separated string of XML format files to add."
)
parser.add_argument(
"-confdir",
default=defaults["config_dir"],
help="configuration directory to load_fido_xml, for example, the format specifications from.",
)

return parser


def parse_args(parser: ArgumentParser) -> argparse.Namespace:
try:
args = parser.parse_args()
except argparse.ArgumentError as e:
parser.print_help()
print(f"\nError: {e}\n")
sys.exit(1)

return args
return parser.parse_args(argv)
131 changes: 13 additions & 118 deletions fido/fido.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,20 @@
import sys
import tarfile
import tempfile
from argparse import ArgumentParser, RawTextHelpFormatter
from contextlib import closing

try:
from time import perf_counter
except ImportError:
from time import clock as perf_counter

import zipfile
from contextlib import closing
from time import perf_counter
from typing import Optional
from xml.etree import cElementTree as ET

from fido import CONFIG_DIR, __version__
from fido.char_handler import escape
from fido.cli_args import parse_cli_args
from fido.package import OlePackage, ZipPackage
from fido.versions import get_local_versions, sig_file_actions

defaults = {
"config_dir": CONFIG_DIR,
"bufsize": 128 * 1024, # (bytes)
"regexcachesize": 2084, # (bytes)
"printmatch": 'OK,%(info.time)s,%(info.puid)s,"%(info.formatname)s","%(info.signaturename)s",%(info.filesize)s,"%(info.filename)s","%(info.mimetype)s","%(info.matchtype)s"\n',
Expand Down Expand Up @@ -74,8 +71,8 @@ class Fido:

def __init__(
self,
quiet=False,
bufsize=None,
quiet: bool = False,
bufsize: Optional[int] = None,
container_bufsize=None,
printnomatch=None,
printmatch=None,
Expand Down Expand Up @@ -793,107 +790,7 @@ def main(args=None):
"""Main FIDO method."""
if not args:
args = sys.argv[1:]

parser = ArgumentParser(
description=defaults["description"],
epilog=defaults["epilog"],
fromfile_prefix_chars="@",
formatter_class=RawTextHelpFormatter,
)
parser.add_argument("-v", default=False, action="store_true", help="show version information")
parser.add_argument("-q", default=False, action="store_true", help="run (more) quietly")
parser.add_argument("-recurse", default=False, action="store_true", help="recurse into subdirectories")
parser.add_argument("-zip", default=False, action="store_true", help="recurse into zip and tar files")
parser.add_argument(
"-noextension",
default=False,
action="store_true",
help="disable extension matching, reduces number of matches but may reduce false positives",
)
parser.add_argument(
"-nocontainer",
default=False,
action="store_true",
help="disable deep scan of container documents, increases speed but may reduce accuracy with big files",
)
parser.add_argument(
"-pronom_only",
default=False,
action="store_true",
help="disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results",
)

group = parser.add_mutually_exclusive_group()
group.add_argument(
"-input", default=False, help="file containing a list of files to check, one per line. - means stdin"
)
group.add_argument(
"files",
nargs="*",
default=[],
metavar="FILE",
help="files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.",
)

parser.add_argument("-filename", default=None, help="filename if file contents passed through STDIN")
parser.add_argument(
"-useformats",
metavar="INCLUDEPUIDS",
default=None,
help="comma separated string of formats to use in identification",
)
parser.add_argument(
"-nouseformats",
metavar="EXCLUDEPUIDS",
default=None,
help="comma separated string of formats not to use in identification",
)
parser.add_argument(
"-matchprintf",
metavar="FORMATSTRING",
default=None,
help="format string (Python style) to use on match. See nomatchprintf, README.txt.",
)
parser.add_argument(
"-nomatchprintf",
metavar="FORMATSTRING",
default=None,
help="format string (Python style) to use if no match. See README.txt",
)
parser.add_argument(
"-bufsize",
type=int,
default=None,
help="size (in bytes) of the buffer to match against (default=" + str(defaults["bufsize"]) + " bytes)",
)
parser.add_argument(
"-sigs",
default=None,
metavar="SIG_ACT",
help='SIG_ACT "check" for new version\nSIG_ACT "update" to latest\nSIG_ACT "list" available versions\nSIG_ACT "n" use version n.',
)
parser.add_argument(
"-container_bufsize",
type=int,
default=None,
help="size (in bytes) of the buffer to match against (default="
+ str(defaults["container_bufsize"])
+ " bytes)",
)
parser.add_argument(
"-loadformats", default=None, metavar="XML1,...,XMLn", help="comma separated string of XML format files to add."
)
parser.add_argument(
"-confdir",
default=CONFIG_DIR,
help="configuration directory to load_fido_xml, for example, the format specifications from.",
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args(args)

args = parse_cli_args(args, defaults)
timer = PerfTimer()

versions = get_local_versions(args.confdir)
Expand All @@ -904,15 +801,13 @@ def main(args=None):
defaults["format_files"] = [defaults["xml_pronomSignature"]]

if args.pronom_only:
versionHeader = "FIDO v{0} ({1}, {2})\n".format(
__version__, defaults["xml_pronomSignature"], defaults["containersignature_file"]
versionHeader = (
f"FIDO v{__version__} ({defaults['xml_pronomSignature']}, {defaults['containersignature_file']})\n"
)
else:
versionHeader = "FIDO v{0} ({1}, {2}, {3})\n".format(
__version__,
defaults["xml_pronomSignature"],
defaults["containersignature_file"],
defaults["xml_fidoExtensionSignature"],
versionHeader = (
f"FIDO v{__version__} ({defaults['xml_pronomSignature']}, {defaults['containersignature_file']}, "
f"{defaults['xml_fidoExtensionSignature']})\n"
)
defaults["format_files"].append(defaults["xml_fidoExtensionSignature"])

Expand Down
91 changes: 42 additions & 49 deletions tests/test_cli_args.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,54 @@
import argparse

import pytest

from fido.cli_args import build_parser, parse_args
from fido.cli_args import parse_cli_args
from fido.fido import defaults

# Common argument string
ARG_STRING = (
"-v -q -recurse -zip -noextension -nocontainer -pronom_only"
"-input files.txt"
"-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4"
)

ARG_STRING = (
"-v -q -recurse -zip -noextension -nocontainer -pronom_only"
"-input files.txt"
"-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4"
)


def test_build_parser():
parser = build_parser()
assert isinstance(parser, argparse.ArgumentParser)

# Check if all expected arguments are present
expected_args = ARG_STRING.split()
for arg in expected_args:
assert arg in parser._option_string_actions


def test_parse_args_valid():
parser = build_parser()

args = parse_args(parser.parse_args(ARG_STRING.split()))

assert args.v is True
assert args.q is True
assert args.recurse is True
assert args.zip is True
assert args.noextension is True
assert args.nocontainer is True
assert args.pronom_only is True
assert args.input == "input_file"
assert args.files == ["file1", "file2"]
assert args.filename == "filename"


def test_parse_args_input_valid():
arg_string = (
"-v -q -recurse -zip -noextension -nocontainer -pronom_only "
"-input files.txt "
"-useformats=fmt1,fmt2 -nouseformats=fmt3,fmt4"
)
args = parse_cli_args(arg_string.split(), defaults)
print(arg_string.split())
print(args)
assert args.v
assert args.q
assert args.recurse
assert args.zip
assert args.noextension
assert args.nocontainer
assert args.pronom_only
assert args.input == "files.txt"
assert args.useformats == "fmt1,fmt2"
assert args.nouseformats == "fmt3,fmt4"


def test_parse_args_invalid(monkeypatch):
parser = build_parser()
def test_parse_args_files_valid():
arg_string = "-q -zip file1.ext file2.ext"
args = parse_cli_args(arg_string.split(), defaults)
print(arg_string.split())
print(args)
assert args.q
assert args.zip
assert args.noextension == False
assert args.nocontainer == False
assert args.pronom_only == False
assert args.files == ["file1.ext", "file2.ext"]
assert args.useformats is None
assert args.nouseformats is None

# Simulate invalid argument input
monkeypatch.setattr("sys.argv", ["prog", "--invalid"])

def test_parse_args_invalid():
arg_string = "-q -zip -bad_arg file1.ext file2.ext"
with pytest.raises(SystemExit):
parse_args(parser)
args = parse_cli_args(arg_string.split(), defaults)


# Simulate missing required argument
monkeypatch.setattr("sys.argv", ["prog", "-input"])
def test_parse_files_and_input_invalid():
arg_string = "-q -zip -input files.txt file1.ext file2.ext"
with pytest.raises(SystemExit):
parse_args(parser)
args = parse_cli_args(arg_string.split(), defaults)

0 comments on commit 630be3a

Please sign in to comment.