From 1ee841375b01d4a0051f6a7b2d9f2c2db444767a Mon Sep 17 00:00:00 2001 From: Aaron Kollasch Date: Thu, 18 Aug 2022 19:01:07 -0400 Subject: [PATCH 1/4] Allow files in PATHS, not just directories --- src/photomanager/actions/fileops.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/photomanager/actions/fileops.py b/src/photomanager/actions/fileops.py index c7da2b1..3df10d0 100644 --- a/src/photomanager/actions/fileops.py +++ b/src/photomanager/actions/fileops.py @@ -67,8 +67,11 @@ def list_files( elif file: files[Path(file).expanduser().resolve()] = None for path in paths: - for p in path.glob("**/*.*"): - files[p] = None + if path.is_file(): + files[path] = None + else: + for p in path.glob("**/*.*"): + files[p] = None exclude_files = {Path(f).expanduser().resolve() for f in exclude_files} filtered_files = {} From d75d8bbc56648e0bfdede7c516320371a8c959e5 Mon Sep 17 00:00:00 2001 From: Aaron Kollasch Date: Thu, 18 Aug 2022 19:55:46 -0400 Subject: [PATCH 2/4] Send additional logs to stderr instead of stdout --- src/photomanager/actions/fileops.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/photomanager/actions/fileops.py b/src/photomanager/actions/fileops.py index 3df10d0..50981a2 100644 --- a/src/photomanager/actions/fileops.py +++ b/src/photomanager/actions/fileops.py @@ -134,7 +134,7 @@ def index_photos( exiftool.start() for current_file in tqdm(files): if logger.isEnabledFor(logging.DEBUG): - tqdm.write(f"Indexing {current_file}") + tqdm.write(f"Indexing {current_file}", file=sys.stderr) try: pf = PhotoFile.from_file_cached( current_file, @@ -189,7 +189,8 @@ def copy_photos( if logger.isEnabledFor(logging.DEBUG): tqdm.write( f"{'Would copy' if dry_run else 'Copying'}: {photo.src} " - f"to {abs_store_path}" + f"to {abs_store_path}", + file=sys.stderr, ) try: if not dry_run: @@ -234,7 +235,8 @@ def remove_photos( if abs_store_path.exists(): if logger.isEnabledFor(logging.DEBUG): tqdm.write( - f"{'Would remove' if dry_run else 'Removing'}: {abs_store_path}" + f"{'Would remove' if dry_run else 'Removing'}: {abs_store_path}", + file=sys.stderr, ) if not dry_run: remove(abs_store_path) @@ -242,7 +244,7 @@ def remove_photos( num_removed_photos += 1 else: if logger.isEnabledFor(logging.DEBUG): - tqdm.write(f"Missing photo: {abs_store_path}") + tqdm.write(f"Missing photo: {abs_store_path}", file=sys.stderr) num_missing_photos += 1 return num_removed_photos, num_missing_photos From 486380866de4e6f549824791530dab604e8f7593 Mon Sep 17 00:00:00 2001 From: Aaron Kollasch Date: Thu, 18 Aug 2022 19:57:40 -0400 Subject: [PATCH 3/4] Add index --dump option Prints indexed photo info to stdout Signed-off-by: Aaron Kollasch --- src/photomanager/actions/actions.py | 1 + src/photomanager/cli.py | 32 +++++- tests/integ_tests/test_cli.py | 167 ++++++++++++++++++++++------ tests/integ_tests/test_photofile.py | 22 ++-- tests/unit_tests/test_cli.py | 18 ++- 5 files changed, 182 insertions(+), 58 deletions(-) diff --git a/src/photomanager/actions/actions.py b/src/photomanager/actions/actions.py index 20d2284..d4a1555 100644 --- a/src/photomanager/actions/actions.py +++ b/src/photomanager/actions/actions.py @@ -65,6 +65,7 @@ def index( num_merged_photos=num_merged_photos, num_skipped_photos=num_skipped_photos, num_error_photos=num_error_photos, + photos=photos, ) diff --git a/src/photomanager/cli.py b/src/photomanager/cli.py index a0d9184..73c5ad7 100755 --- a/src/photomanager/cli.py +++ b/src/photomanager/cli.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import annotations +import json import logging import sys from os import PathLike @@ -69,8 +70,7 @@ def _create( # fmt: off @click.command("index", help="Index and add items to database") -@click.option("--db", type=click.Path(dir_okay=False), required=True, - default=DEFAULT_DB, +@click.option("--db", type=click.Path(dir_okay=False), help="PhotoManager database filepath (.json). " "Add extensions .zst or .gz to compress.") @click.option("--source", type=click.Path(file_okay=False), @@ -86,16 +86,23 @@ def _create( @click.option("--timezone-default", type=str, default=None, help="Timezone to use when indexing timezone-naive photos " "(example=\"-0400\", default=\"local\")") +@click.option("--hash-algorithm", + type=click.Choice(HASH_ALGORITHMS), + default=DEFAULT_HASH_ALGO.value, + help=f"Hash algorithm to use if no database provided " + f"(default={DEFAULT_HASH_ALGO.value})") @click.option("--storage-type", type=click.Choice(fileops.STORAGE_TYPES), default="HDD", help="Class of storage medium (HDD, SSD, RAID)") @click.option("--debug", default=False, is_flag=True, help="Run in debug mode") +@click.option("--dump", default=False, is_flag=True, + help="Print photo info to stdout") @click.option("--dry-run", default=False, is_flag=True, help="Perform a dry run that makes no changes") @click.argument("paths", nargs=-1, type=click.Path()) # fmt: on def _index( - db: Union[str, PathLike], + db: Union[str, PathLike] = None, source: Optional[Union[str, PathLike]] = None, file: Optional[Union[str, PathLike]] = None, paths: Iterable[Union[str, PathLike]] = tuple(), @@ -103,8 +110,10 @@ def _index( skip_existing: bool = False, debug: bool = False, dry_run: bool = False, + dump: bool = False, priority: int = 10, timezone_default: Optional[str] = None, + hash_algorithm: str = DEFAULT_HASH_ALGO.value, storage_type: str = "HDD", ): if not source and not file and not paths: @@ -112,8 +121,13 @@ def _index( print(click.get_current_context().get_help()) click_exit(1) config_logging(debug=debug) - database = Database.from_file(db, create_new=True) - skip_existing = set(database.sources) if skip_existing else set() + if db is not None: + database = Database.from_file(db, create_new=True) + skip_existing = set(database.sources) if skip_existing else set() + else: + database = Database() + skip_existing = set() + database.hash_algorithm = HashAlgorithm(hash_algorithm) filtered_files = fileops.list_files( source=source, file=file, @@ -128,7 +142,13 @@ def _index( timezone_default=timezone_default, storage_type=storage_type, ) - if not dry_run: + if dump: + photos = index_result["photos"] + result = {} + for filename, photo in zip(filtered_files, photos): + result[filename] = photo.to_dict() + print(json.dumps(result, indent=2)) + if db is not None and not dry_run: database.save(path=db, argv=sys.argv) click_exit(1 if index_result["num_error_photos"] else 0) diff --git a/tests/integ_tests/test_cli.py b/tests/integ_tests/test_cli.py index fc18d04..289704b 100644 --- a/tests/integ_tests/test_cli.py +++ b/tests/integ_tests/test_cli.py @@ -1,3 +1,4 @@ +import json import logging import os import subprocess @@ -10,6 +11,8 @@ from photomanager import cli, database, version +from .test_photofile import PHOTOFILE_EXPECTED_RESULTS + FIXTURE_DIR = Path(__file__).resolve().parent.parent / "test_files" ALL_IMG_DIRS = pytest.mark.datafiles( FIXTURE_DIR / "A", @@ -17,16 +20,7 @@ FIXTURE_DIR / "C", keep_top_dir=True, ) -EXPECTED_HASHES = { - "A/img1.jpg": "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb", - "A/img2.jpg": "3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666", - "A/img1.png": "1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9", - "A/img4.jpg": "79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c", - "B/img1.jpg": "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb", - "B/img2.jpg": "e9fec87008fd240309b81c997e7ec5491fee8da7eb1a76fc39b8fcafa76bb583", - "B/img4.jpg": "2b0f304f86655ebd04272cc5e7e886e400b79a53ecfdc789f75dd380cbcc8317", - "C/img3.tiff": "2aca4e78afbcebf2526ad8ac544d90b92991faae22499eec45831ef7be392391", -} +EXPECTED_HASHES = {name: pf.chk for name, pf in PHOTOFILE_EXPECTED_RESULTS.items()} def check_dir_empty(dir_path): @@ -498,14 +492,16 @@ def test_cli_import_no_overwrite(datafiles, caplog): check_dir_empty(fs) -@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True) -def test_cli_index_skip_existing(datafiles, caplog): +@pytest.mark.datafiles(FIXTURE_DIR / "A", keep_top_dir=True) +def test_cli_index_dump_skip_existing(datafiles, caplog): """ + index --dump prints correct photofile json to stdout The --skip-existing flag prevents indexing existing source files """ caplog.set_level(logging.DEBUG) - runner = CliRunner() + runner = CliRunner(mix_stderr=False) with runner.isolated_filesystem(temp_dir=datafiles) as fs: + print(os.listdir(datafiles / "A")) result = runner.invoke( cast(Group, cli.main), [ @@ -514,26 +510,59 @@ def test_cli_index_skip_existing(datafiles, caplog): str(datafiles / "test.json"), "--priority", "10", + "--timezone-default", + "-0400", "--debug", - str(datafiles / "C"), + "--dump", + str(datafiles / "A" / "img1.jpg"), + str(datafiles / "A" / "img1.png"), ], ) - print("\nINDEX C") - print(result.output) + print("\nINDEX A") + print(result.stderr) + print(result.stdout) print(result) assert result.exit_code == 0 - assert "Indexed 1/1 items" in caplog.messages - assert "Added 1 new items and merged 0 items" in caplog.messages + assert "Indexed 2/2 items" in caplog.messages + assert "Added 2 new items and merged 0 items" in caplog.messages with open(datafiles / "test.json", "rb") as f: s = f.read() db = database.Database.from_json(s) print(db.json) - assert sum(1 for _ in db.sources) == 1 - assert set(db.sources) == {str(datafiles / "C" / "img3.tiff")} - - with open(datafiles / "C" / "newphoto.jpg", "wb") as f: - f.write(b"contents") + assert sum(1 for _ in db.sources) == 2 + assert set(db.sources) == { + str(datafiles / "A" / "img1.jpg"), + str(datafiles / "A" / "img1.png"), + } + assert json.loads(result.stdout) == { + str(datafiles / "A" / "img1.jpg"): { + "chk": ( + "d090ce7023b57925e7e94fc80372e343" + "4fb1897e00b4452a25930dd1b83648fb" + ), + "src": str(datafiles / "A" / "img1.jpg"), + "dt": "2015:08:01 18:28:36.90", + "ts": 1438468116.9, + "fsz": 771, + "sto": "", + "prio": 10, + "tzo": -14400.0, + }, + str(datafiles / "A" / "img1.png"): { + "chk": ( + "1e10df2e3abe4c810551525b6cb2eb80" + "5886de240e04cc7c13c58ae208cabfb9" + ), + "src": str(datafiles / "A" / "img1.png"), + "dt": "2015:08:01 18:28:36.90", + "ts": 1438468116.9, + "fsz": 382, + "sto": "", + "prio": 10, + "tzo": -14400.0, + }, + } result = runner.invoke( cast(Group, cli.main), @@ -543,28 +572,96 @@ def test_cli_index_skip_existing(datafiles, caplog): str(datafiles / "test.json"), "--priority", "10", + "--timezone-default", + "-0400", "--skip-existing", "--debug", + "--dry-run", + "--dump", + str(datafiles / "A"), + ], + ) + print("\nINDEX A dump skip-existing") + print(result.stderr) + print(result.stdout) + print(result) + assert result.exit_code == 0 + assert "Indexed 2/2 items" in caplog.messages + assert "Added 2 new items and merged 0 items" in caplog.messages + assert json.loads(result.stdout) == { + str(datafiles / "A" / "img4.jpg"): { + "chk": ( + "79ac4a89fb3d81ab1245b21b11ff7512" + "495debca60f6abf9afbb1e1fbfe9d98c" + ), + "src": str(datafiles / "A" / "img4.jpg"), + "dt": "2018:08:01 20:28:36", + "ts": 1533169716.0, + "fsz": 759, + "sto": "", + "prio": 10, + "tzo": -14400.0, + }, + str(datafiles / "A" / "img2.jpg"): { + "chk": ( + "3b39f47d51f63e54c76417ee6e04c34b" + "d3ff5ac47696824426dca9e200f03666" + ), + "src": str(datafiles / "A" / "img2.jpg"), + "dt": "2015:08:01 18:28:36.99", + "ts": 1438468116.99, + "fsz": 771, + "sto": "", + "prio": 10, + "tzo": -14400.0, + }, + } + + check_dir_empty(fs) + + +@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True) +def test_cli_index_dump_no_database(datafiles, caplog): + caplog.set_level(logging.DEBUG) + runner = CliRunner(mix_stderr=False) + with runner.isolated_filesystem(temp_dir=datafiles) as fs: + result = runner.invoke( + cast(Group, cli.main), + [ + "index", + "--priority", + "10", + "--timezone-default", + "+0100", + "--skip-existing", + "--debug", + "--dump", str(datafiles / "C"), ], ) - print("\nINDEX C skip-existing") - print(result.output) + print("\nINDEX C") + print(result.stderr) + print(result.stdout) print(result) assert result.exit_code == 0 assert "Indexed 1/1 items" in caplog.messages assert "Added 1 new items and merged 0 items" in caplog.messages - - with open(datafiles / "test.json", "rb") as f: - s = f.read() - db = database.Database.from_json(s) - print(db.json) - assert sum(1 for _ in db.sources) == 2 - assert set(db.sources) == { - str(datafiles / "C" / "img3.tiff"), - str(datafiles / "C" / "newphoto.jpg"), + assert json.loads(result.stdout) == { + str(datafiles / "C" / "img3.tiff"): { + "chk": ( + "2aca4e78afbcebf2526ad8ac544d90b9" + "2991faae22499eec45831ef7be392391" + ), + "src": str(datafiles / "C" / "img3.tiff"), + "dt": "2018:08:01 19:28:36", + "ts": 1533148116.0, + "fsz": 506, + "sto": "", + "prio": 10, + "tzo": 3600.0, + } } - + print("\n".join(str(p) for p in Path(datafiles).glob("**/*"))) check_dir_empty(fs) diff --git a/tests/integ_tests/test_photofile.py b/tests/integ_tests/test_photofile.py index 02b18f1..9c625f0 100644 --- a/tests/integ_tests/test_photofile.py +++ b/tests/integ_tests/test_photofile.py @@ -13,8 +13,8 @@ FIXTURE_DIR / "C", keep_top_dir=True, ) -photofile_expected_results = [ - PhotoFile( +PHOTOFILE_EXPECTED_RESULTS = { + "A/img1.jpg": PhotoFile( chk="d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb", src="A/img1.jpg", dt="2015:08:01 18:28:36.90", @@ -22,7 +22,7 @@ fsz=771, tzo=-14400.0, ), - PhotoFile( + "A/img2.jpg": PhotoFile( chk="3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666", src="A/img2.jpg", dt="2015:08:01 18:28:36.99", @@ -30,7 +30,7 @@ fsz=771, tzo=3600.0, ), - PhotoFile( + "A/img1.png": PhotoFile( chk="1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9", src="A/img1.png", dt="2015:08:01 18:28:36.90", @@ -38,7 +38,7 @@ fsz=382, tzo=0.0, ), - PhotoFile( + "A/img4.jpg": PhotoFile( chk="79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c", src="A/img4.jpg", dt="2018:08:01 20:28:36", @@ -46,7 +46,7 @@ fsz=759, tzo=-14400.0, ), - PhotoFile( + "B/img1.jpg": PhotoFile( chk="d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb", src="B/img1.jpg", dt="2015:08:01 18:28:36.90", @@ -54,7 +54,7 @@ fsz=771, tzo=-14400.0, ), - PhotoFile( + "B/img2.jpg": PhotoFile( chk="e9fec87008fd240309b81c997e7ec5491fee8da7eb1a76fc39b8fcafa76bb583", src="B/img2.jpg", dt="2015:08:01 18:28:36.99", @@ -62,7 +62,7 @@ fsz=789, tzo=-14400.0, ), - PhotoFile( + "B/img4.jpg": PhotoFile( chk="2b0f304f86655ebd04272cc5e7e886e400b79a53ecfdc789f75dd380cbcc8317", src="B/img4.jpg", dt="2018:08:01 20:28:36", @@ -70,7 +70,7 @@ fsz=777, tzo=-14400.0, ), - PhotoFile( + "C/img3.tiff": PhotoFile( chk="2aca4e78afbcebf2526ad8ac544d90b92991faae22499eec45831ef7be392391", src="C/img3.tiff", dt="2018:08:01 19:28:36", @@ -78,13 +78,13 @@ fsz=506, tzo=-14400.0, ), -] +} @ALL_IMG_DIRS def test_photofile_from_file(datafiles): with ExifTool(): - for pf in photofile_expected_results: + for pf in PHOTOFILE_EXPECTED_RESULTS.values(): pf = PhotoFile.from_dict(pf.to_dict()) rel_path = pf.src pf.src = str(datafiles / rel_path) diff --git a/tests/unit_tests/test_cli.py b/tests/unit_tests/test_cli.py index 5cc5b56..c752519 100644 --- a/tests/unit_tests/test_cli.py +++ b/tests/unit_tests/test_cli.py @@ -126,9 +126,9 @@ def test_cli_index_nothing(tmpdir, caplog): check_dir_empty(fs) -def test_cli_index_collect_no_db(tmpdir, caplog): +def test_cli_create_collect_no_db(tmpdir, caplog): """ - If index is run with no db argument, db is created at DEFAULT_DB + If create and is run with no db argument, db is created at DEFAULT_DB """ caplog.set_level(logging.DEBUG) CliRunner.isolated_filesystem(tmpdir) @@ -140,10 +140,7 @@ def test_cli_index_collect_no_db(tmpdir, caplog): result = runner.invoke( cast(Group, cli.main), [ - "index", - "--priority", - "10", - str(tmpdir), + "create", ], ) print("\nINDEX no-db") @@ -278,6 +275,9 @@ def test_cli_import_no_changes(tmpdir, caplog): def test_cli_collect_no_db(tmpdir, caplog): + """ + collect must be given a database to collect to + """ caplog.set_level(logging.DEBUG) CliRunner.isolated_filesystem(tmpdir) runner = CliRunner() @@ -298,6 +298,9 @@ def test_cli_collect_no_db(tmpdir, caplog): def test_cli_verify_wrong_storage_type(tmpdir, caplog): + """ + verify does not accept a nonexistent storage-type + """ caplog.set_level(logging.DEBUG) CliRunner.isolated_filesystem(tmpdir) runner = CliRunner() @@ -316,6 +319,9 @@ def test_cli_verify_wrong_storage_type(tmpdir, caplog): def test_cli_verify_random_sample(tmpdir, caplog): + """ + verify random-fraction checks a fraction of the database + """ caplog.set_level(logging.DEBUG) CliRunner.isolated_filesystem(tmpdir) runner = CliRunner() From abf71c95f908da06b0791fdf56c09d397bd4c80f Mon Sep 17 00:00:00 2001 From: Aaron Kollasch Date: Thu, 18 Aug 2022 19:58:18 -0400 Subject: [PATCH 4/4] Add error message if timezone string was not parsed --- src/photomanager/database.py | 3 ++- tests/unit_tests/test_datetime.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/photomanager/database.py b/src/photomanager/database.py index e8ae446..58586b3 100644 --- a/src/photomanager/database.py +++ b/src/photomanager/database.py @@ -62,7 +62,8 @@ def tz_str_to_tzinfo(tz: str): try: return datetime.strptime(tz, "%z").tzinfo except ValueError: - pass + logger = logging.getLogger(__name__) + logger.error(f"Could not parse timezone string: {tz}") class DatabaseException(PhotoManagerBaseException): diff --git a/tests/unit_tests/test_datetime.py b/tests/unit_tests/test_datetime.py index 3b69eb8..0bc63ca 100644 --- a/tests/unit_tests/test_datetime.py +++ b/tests/unit_tests/test_datetime.py @@ -227,16 +227,20 @@ def test_datetime_to_timestamp_errors(): expected_tzinfo = [ - ("local", None), - ("-0400", datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), - ("Z", datetime.timezone.utc), - ("+1000", datetime.timezone(datetime.timedelta(seconds=36000))), - ("wrong", None), + ("local", None, False), + ("-0400", datetime.timezone(datetime.timedelta(days=-1, seconds=72000)), False), + ("Z", datetime.timezone.utc, False), + ("+1000", datetime.timezone(datetime.timedelta(seconds=36000)), False), + ("wrong", None, True), ] -@pytest.mark.parametrize("tz_str,tz_info", expected_tzinfo) -def test_database_timezone_default(tz_str, tz_info): +@pytest.mark.parametrize("tz_str,tz_info,error", expected_tzinfo) +def test_database_timezone_default(tz_str, tz_info, error, caplog): db = Database() db._db["timezone_default"] = tz_str assert db.timezone_default == tz_info + if error: + assert caplog.messages + else: + assert not caplog.messages