Merge pull request #23 from aaronkollasch/dev

Add index --dump option
aaronkollasch · Aug 19, 2022 · 18800ca · 18800ca
2 parents a9c6db6 + abf71c9
commit 18800ca
Show file tree

Hide file tree

Showing 8 changed files with 206 additions and 72 deletions.
diff --git a/src/photomanager/actions/actions.py b/src/photomanager/actions/actions.py
@@ -65,6 +65,7 @@ def index(
         num_merged_photos=num_merged_photos,
         num_skipped_photos=num_skipped_photos,
         num_error_photos=num_error_photos,
+        photos=photos,
     )
 
 

diff --git a/src/photomanager/actions/fileops.py b/src/photomanager/actions/fileops.py
@@ -67,8 +67,11 @@ def list_files(
     elif file:
         files[Path(file).expanduser().resolve()] = None
     for path in paths:
-        for p in path.glob("**/*.*"):
-            files[p] = None
+        if path.is_file():
+            files[path] = None
+        else:
+            for p in path.glob("**/*.*"):
+                files[p] = None
 
     exclude_files = {Path(f).expanduser().resolve() for f in exclude_files}
     filtered_files = {}
@@ -131,7 +134,7 @@ def index_photos(
     exiftool.start()
     for current_file in tqdm(files):
         if logger.isEnabledFor(logging.DEBUG):
-            tqdm.write(f"Indexing {current_file}")
+            tqdm.write(f"Indexing {current_file}", file=sys.stderr)
         try:
             pf = PhotoFile.from_file_cached(
                 current_file,
@@ -186,7 +189,8 @@ def copy_photos(
         if logger.isEnabledFor(logging.DEBUG):
             tqdm.write(
                 f"{'Would copy' if dry_run else 'Copying'}: {photo.src} "
-                f"to {abs_store_path}"
+                f"to {abs_store_path}",
+                file=sys.stderr,
             )
         try:
             if not dry_run:
@@ -231,15 +235,16 @@ def remove_photos(
         if abs_store_path.exists():
             if logger.isEnabledFor(logging.DEBUG):
                 tqdm.write(
-                    f"{'Would remove' if dry_run else 'Removing'}: {abs_store_path}"
+                    f"{'Would remove' if dry_run else 'Removing'}: {abs_store_path}",
+                    file=sys.stderr,
                 )
             if not dry_run:
                 remove(abs_store_path)
                 photo.sto = ""
             num_removed_photos += 1
         else:
             if logger.isEnabledFor(logging.DEBUG):
-                tqdm.write(f"Missing photo: {abs_store_path}")
+                tqdm.write(f"Missing photo: {abs_store_path}", file=sys.stderr)
             num_missing_photos += 1
     return num_removed_photos, num_missing_photos
 

diff --git a/src/photomanager/cli.py b/src/photomanager/cli.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 from __future__ import annotations
 
+import json
 import logging
 import sys
 from os import PathLike
@@ -69,8 +70,7 @@ def _create(
 
 # fmt: off
 @click.command("index", help="Index and add items to database")
-@click.option("--db", type=click.Path(dir_okay=False), required=True,
-              default=DEFAULT_DB,
+@click.option("--db", type=click.Path(dir_okay=False),
               help="PhotoManager database filepath (.json). "
                    "Add extensions .zst or .gz to compress.")
 @click.option("--source", type=click.Path(file_okay=False),
@@ -86,34 +86,48 @@ def _create(
 @click.option("--timezone-default", type=str, default=None,
               help="Timezone to use when indexing timezone-naive photos "
                    "(example=\"-0400\", default=\"local\")")
+@click.option("--hash-algorithm",
+              type=click.Choice(HASH_ALGORITHMS),
+              default=DEFAULT_HASH_ALGO.value,
+              help=f"Hash algorithm to use if no database provided "
+                   f"(default={DEFAULT_HASH_ALGO.value})")
 @click.option("--storage-type", type=click.Choice(fileops.STORAGE_TYPES), default="HDD",
               help="Class of storage medium (HDD, SSD, RAID)")
 @click.option("--debug", default=False, is_flag=True,
               help="Run in debug mode")
+@click.option("--dump", default=False, is_flag=True,
+              help="Print photo info to stdout")
 @click.option("--dry-run", default=False, is_flag=True,
               help="Perform a dry run that makes no changes")
 @click.argument("paths", nargs=-1, type=click.Path())
 # fmt: on
 def _index(
-    db: Union[str, PathLike],
+    db: Union[str, PathLike] = None,
     source: Optional[Union[str, PathLike]] = None,
     file: Optional[Union[str, PathLike]] = None,
     paths: Iterable[Union[str, PathLike]] = tuple(),
     exclude: Iterable[str] = tuple(),
     skip_existing: bool = False,
     debug: bool = False,
     dry_run: bool = False,
+    dump: bool = False,
     priority: int = 10,
     timezone_default: Optional[str] = None,
+    hash_algorithm: str = DEFAULT_HASH_ALGO.value,
     storage_type: str = "HDD",
 ):
     if not source and not file and not paths:
         print("Nothing to index")
         print(click.get_current_context().get_help())
         click_exit(1)
     config_logging(debug=debug)
-    database = Database.from_file(db, create_new=True)
-    skip_existing = set(database.sources) if skip_existing else set()
+    if db is not None:
+        database = Database.from_file(db, create_new=True)
+        skip_existing = set(database.sources) if skip_existing else set()
+    else:
+        database = Database()
+        skip_existing = set()
+        database.hash_algorithm = HashAlgorithm(hash_algorithm)
     filtered_files = fileops.list_files(
         source=source,
         file=file,
@@ -128,7 +142,13 @@ def _index(
         timezone_default=timezone_default,
         storage_type=storage_type,
     )
-    if not dry_run:
+    if dump:
+        photos = index_result["photos"]
+        result = {}
+        for filename, photo in zip(filtered_files, photos):
+            result[filename] = photo.to_dict()
+        print(json.dumps(result, indent=2))
+    if db is not None and not dry_run:
         database.save(path=db, argv=sys.argv)
     click_exit(1 if index_result["num_error_photos"] else 0)
 

diff --git a/src/photomanager/database.py b/src/photomanager/database.py
@@ -62,7 +62,8 @@ def tz_str_to_tzinfo(tz: str):
     try:
         return datetime.strptime(tz, "%z").tzinfo
     except ValueError:
-        pass
+        logger = logging.getLogger(__name__)
+        logger.error(f"Could not parse timezone string: {tz}")
 
 
 class DatabaseException(PhotoManagerBaseException):

diff --git a/tests/integ_tests/test_cli.py b/tests/integ_tests/test_cli.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import subprocess
@@ -10,23 +11,16 @@
 
 from photomanager import cli, database, version
 
+from .test_photofile import PHOTOFILE_EXPECTED_RESULTS
+
 FIXTURE_DIR = Path(__file__).resolve().parent.parent / "test_files"
 ALL_IMG_DIRS = pytest.mark.datafiles(
     FIXTURE_DIR / "A",
     FIXTURE_DIR / "B",
     FIXTURE_DIR / "C",
     keep_top_dir=True,
 )
-EXPECTED_HASHES = {
-    "A/img1.jpg": "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
-    "A/img2.jpg": "3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666",
-    "A/img1.png": "1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9",
-    "A/img4.jpg": "79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c",
-    "B/img1.jpg": "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
-    "B/img2.jpg": "e9fec87008fd240309b81c997e7ec5491fee8da7eb1a76fc39b8fcafa76bb583",
-    "B/img4.jpg": "2b0f304f86655ebd04272cc5e7e886e400b79a53ecfdc789f75dd380cbcc8317",
-    "C/img3.tiff": "2aca4e78afbcebf2526ad8ac544d90b92991faae22499eec45831ef7be392391",
-}
+EXPECTED_HASHES = {name: pf.chk for name, pf in PHOTOFILE_EXPECTED_RESULTS.items()}
 
 
 def check_dir_empty(dir_path):
@@ -498,14 +492,16 @@ def test_cli_import_no_overwrite(datafiles, caplog):
         check_dir_empty(fs)
 
 
-@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True)
-def test_cli_index_skip_existing(datafiles, caplog):
+@pytest.mark.datafiles(FIXTURE_DIR / "A", keep_top_dir=True)
+def test_cli_index_dump_skip_existing(datafiles, caplog):
     """
+    index --dump prints correct photofile json to stdout
     The --skip-existing flag prevents indexing existing source files
     """
     caplog.set_level(logging.DEBUG)
-    runner = CliRunner()
+    runner = CliRunner(mix_stderr=False)
     with runner.isolated_filesystem(temp_dir=datafiles) as fs:
+        print(os.listdir(datafiles / "A"))
         result = runner.invoke(
             cast(Group, cli.main),
             [
@@ -514,26 +510,59 @@ def test_cli_index_skip_existing(datafiles, caplog):
                 str(datafiles / "test.json"),
                 "--priority",
                 "10",
+                "--timezone-default",
+                "-0400",
                 "--debug",
-                str(datafiles / "C"),
+                "--dump",
+                str(datafiles / "A" / "img1.jpg"),
+                str(datafiles / "A" / "img1.png"),
             ],
         )
-        print("\nINDEX C")
-        print(result.output)
+        print("\nINDEX A")
+        print(result.stderr)
+        print(result.stdout)
         print(result)
         assert result.exit_code == 0
-        assert "Indexed 1/1 items" in caplog.messages
-        assert "Added 1 new items and merged 0 items" in caplog.messages
+        assert "Indexed 2/2 items" in caplog.messages
+        assert "Added 2 new items and merged 0 items" in caplog.messages
 
         with open(datafiles / "test.json", "rb") as f:
             s = f.read()
             db = database.Database.from_json(s)
         print(db.json)
-        assert sum(1 for _ in db.sources) == 1
-        assert set(db.sources) == {str(datafiles / "C" / "img3.tiff")}
-
-        with open(datafiles / "C" / "newphoto.jpg", "wb") as f:
-            f.write(b"contents")
+        assert sum(1 for _ in db.sources) == 2
+        assert set(db.sources) == {
+            str(datafiles / "A" / "img1.jpg"),
+            str(datafiles / "A" / "img1.png"),
+        }
+        assert json.loads(result.stdout) == {
+            str(datafiles / "A" / "img1.jpg"): {
+                "chk": (
+                    "d090ce7023b57925e7e94fc80372e343"
+                    "4fb1897e00b4452a25930dd1b83648fb"
+                ),
+                "src": str(datafiles / "A" / "img1.jpg"),
+                "dt": "2015:08:01 18:28:36.90",
+                "ts": 1438468116.9,
+                "fsz": 771,
+                "sto": "",
+                "prio": 10,
+                "tzo": -14400.0,
+            },
+            str(datafiles / "A" / "img1.png"): {
+                "chk": (
+                    "1e10df2e3abe4c810551525b6cb2eb80"
+                    "5886de240e04cc7c13c58ae208cabfb9"
+                ),
+                "src": str(datafiles / "A" / "img1.png"),
+                "dt": "2015:08:01 18:28:36.90",
+                "ts": 1438468116.9,
+                "fsz": 382,
+                "sto": "",
+                "prio": 10,
+                "tzo": -14400.0,
+            },
+        }
 
         result = runner.invoke(
             cast(Group, cli.main),
@@ -543,28 +572,96 @@ def test_cli_index_skip_existing(datafiles, caplog):
                 str(datafiles / "test.json"),
                 "--priority",
                 "10",
+                "--timezone-default",
+                "-0400",
                 "--skip-existing",
                 "--debug",
+                "--dry-run",
+                "--dump",
+                str(datafiles / "A"),
+            ],
+        )
+        print("\nINDEX A dump skip-existing")
+        print(result.stderr)
+        print(result.stdout)
+        print(result)
+        assert result.exit_code == 0
+        assert "Indexed 2/2 items" in caplog.messages
+        assert "Added 2 new items and merged 0 items" in caplog.messages
+        assert json.loads(result.stdout) == {
+            str(datafiles / "A" / "img4.jpg"): {
+                "chk": (
+                    "79ac4a89fb3d81ab1245b21b11ff7512"
+                    "495debca60f6abf9afbb1e1fbfe9d98c"
+                ),
+                "src": str(datafiles / "A" / "img4.jpg"),
+                "dt": "2018:08:01 20:28:36",
+                "ts": 1533169716.0,
+                "fsz": 759,
+                "sto": "",
+                "prio": 10,
+                "tzo": -14400.0,
+            },
+            str(datafiles / "A" / "img2.jpg"): {
+                "chk": (
+                    "3b39f47d51f63e54c76417ee6e04c34b"
+                    "d3ff5ac47696824426dca9e200f03666"
+                ),
+                "src": str(datafiles / "A" / "img2.jpg"),
+                "dt": "2015:08:01 18:28:36.99",
+                "ts": 1438468116.99,
+                "fsz": 771,
+                "sto": "",
+                "prio": 10,
+                "tzo": -14400.0,
+            },
+        }
+
+        check_dir_empty(fs)
+
+
+@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True)
+def test_cli_index_dump_no_database(datafiles, caplog):
+    caplog.set_level(logging.DEBUG)
+    runner = CliRunner(mix_stderr=False)
+    with runner.isolated_filesystem(temp_dir=datafiles) as fs:
+        result = runner.invoke(
+            cast(Group, cli.main),
+            [
+                "index",
+                "--priority",
+                "10",
+                "--timezone-default",
+                "+0100",
+                "--skip-existing",
+                "--debug",
+                "--dump",
                 str(datafiles / "C"),
             ],
         )
-        print("\nINDEX C skip-existing")
-        print(result.output)
+        print("\nINDEX C")
+        print(result.stderr)
+        print(result.stdout)
         print(result)
         assert result.exit_code == 0
         assert "Indexed 1/1 items" in caplog.messages
         assert "Added 1 new items and merged 0 items" in caplog.messages
-
-        with open(datafiles / "test.json", "rb") as f:
-            s = f.read()
-            db = database.Database.from_json(s)
-        print(db.json)
-        assert sum(1 for _ in db.sources) == 2
-        assert set(db.sources) == {
-            str(datafiles / "C" / "img3.tiff"),
-            str(datafiles / "C" / "newphoto.jpg"),
+        assert json.loads(result.stdout) == {
+            str(datafiles / "C" / "img3.tiff"): {
+                "chk": (
+                    "2aca4e78afbcebf2526ad8ac544d90b9"
+                    "2991faae22499eec45831ef7be392391"
+                ),
+                "src": str(datafiles / "C" / "img3.tiff"),
+                "dt": "2018:08:01 19:28:36",
+                "ts": 1533148116.0,
+                "fsz": 506,
+                "sto": "",
+                "prio": 10,
+                "tzo": 3600.0,
+            }
         }
-
+        print("\n".join(str(p) for p in Path(datafiles).glob("**/*")))
         check_dir_empty(fs)