Skip to content

Commit

Permalink
Merge pull request #23 from aaronkollasch/dev
Browse files Browse the repository at this point in the history
Add index --dump option
  • Loading branch information
aaronkollasch authored Aug 19, 2022
2 parents a9c6db6 + abf71c9 commit 18800ca
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 72 deletions.
1 change: 1 addition & 0 deletions src/photomanager/actions/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def index(
num_merged_photos=num_merged_photos,
num_skipped_photos=num_skipped_photos,
num_error_photos=num_error_photos,
photos=photos,
)


Expand Down
17 changes: 11 additions & 6 deletions src/photomanager/actions/fileops.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,11 @@ def list_files(
elif file:
files[Path(file).expanduser().resolve()] = None
for path in paths:
for p in path.glob("**/*.*"):
files[p] = None
if path.is_file():
files[path] = None
else:
for p in path.glob("**/*.*"):
files[p] = None

exclude_files = {Path(f).expanduser().resolve() for f in exclude_files}
filtered_files = {}
Expand Down Expand Up @@ -131,7 +134,7 @@ def index_photos(
exiftool.start()
for current_file in tqdm(files):
if logger.isEnabledFor(logging.DEBUG):
tqdm.write(f"Indexing {current_file}")
tqdm.write(f"Indexing {current_file}", file=sys.stderr)
try:
pf = PhotoFile.from_file_cached(
current_file,
Expand Down Expand Up @@ -186,7 +189,8 @@ def copy_photos(
if logger.isEnabledFor(logging.DEBUG):
tqdm.write(
f"{'Would copy' if dry_run else 'Copying'}: {photo.src} "
f"to {abs_store_path}"
f"to {abs_store_path}",
file=sys.stderr,
)
try:
if not dry_run:
Expand Down Expand Up @@ -231,15 +235,16 @@ def remove_photos(
if abs_store_path.exists():
if logger.isEnabledFor(logging.DEBUG):
tqdm.write(
f"{'Would remove' if dry_run else 'Removing'}: {abs_store_path}"
f"{'Would remove' if dry_run else 'Removing'}: {abs_store_path}",
file=sys.stderr,
)
if not dry_run:
remove(abs_store_path)
photo.sto = ""
num_removed_photos += 1
else:
if logger.isEnabledFor(logging.DEBUG):
tqdm.write(f"Missing photo: {abs_store_path}")
tqdm.write(f"Missing photo: {abs_store_path}", file=sys.stderr)
num_missing_photos += 1
return num_removed_photos, num_missing_photos

Expand Down
32 changes: 26 additions & 6 deletions src/photomanager/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
from __future__ import annotations

import json
import logging
import sys
from os import PathLike
Expand Down Expand Up @@ -69,8 +70,7 @@ def _create(

# fmt: off
@click.command("index", help="Index and add items to database")
@click.option("--db", type=click.Path(dir_okay=False), required=True,
default=DEFAULT_DB,
@click.option("--db", type=click.Path(dir_okay=False),
help="PhotoManager database filepath (.json). "
"Add extensions .zst or .gz to compress.")
@click.option("--source", type=click.Path(file_okay=False),
Expand All @@ -86,34 +86,48 @@ def _create(
@click.option("--timezone-default", type=str, default=None,
help="Timezone to use when indexing timezone-naive photos "
"(example=\"-0400\", default=\"local\")")
@click.option("--hash-algorithm",
type=click.Choice(HASH_ALGORITHMS),
default=DEFAULT_HASH_ALGO.value,
help=f"Hash algorithm to use if no database provided "
f"(default={DEFAULT_HASH_ALGO.value})")
@click.option("--storage-type", type=click.Choice(fileops.STORAGE_TYPES), default="HDD",
help="Class of storage medium (HDD, SSD, RAID)")
@click.option("--debug", default=False, is_flag=True,
help="Run in debug mode")
@click.option("--dump", default=False, is_flag=True,
help="Print photo info to stdout")
@click.option("--dry-run", default=False, is_flag=True,
help="Perform a dry run that makes no changes")
@click.argument("paths", nargs=-1, type=click.Path())
# fmt: on
def _index(
db: Union[str, PathLike],
db: Union[str, PathLike] = None,
source: Optional[Union[str, PathLike]] = None,
file: Optional[Union[str, PathLike]] = None,
paths: Iterable[Union[str, PathLike]] = tuple(),
exclude: Iterable[str] = tuple(),
skip_existing: bool = False,
debug: bool = False,
dry_run: bool = False,
dump: bool = False,
priority: int = 10,
timezone_default: Optional[str] = None,
hash_algorithm: str = DEFAULT_HASH_ALGO.value,
storage_type: str = "HDD",
):
if not source and not file and not paths:
print("Nothing to index")
print(click.get_current_context().get_help())
click_exit(1)
config_logging(debug=debug)
database = Database.from_file(db, create_new=True)
skip_existing = set(database.sources) if skip_existing else set()
if db is not None:
database = Database.from_file(db, create_new=True)
skip_existing = set(database.sources) if skip_existing else set()
else:
database = Database()
skip_existing = set()
database.hash_algorithm = HashAlgorithm(hash_algorithm)
filtered_files = fileops.list_files(
source=source,
file=file,
Expand All @@ -128,7 +142,13 @@ def _index(
timezone_default=timezone_default,
storage_type=storage_type,
)
if not dry_run:
if dump:
photos = index_result["photos"]
result = {}
for filename, photo in zip(filtered_files, photos):
result[filename] = photo.to_dict()
print(json.dumps(result, indent=2))
if db is not None and not dry_run:
database.save(path=db, argv=sys.argv)
click_exit(1 if index_result["num_error_photos"] else 0)

Expand Down
3 changes: 2 additions & 1 deletion src/photomanager/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def tz_str_to_tzinfo(tz: str):
try:
return datetime.strptime(tz, "%z").tzinfo
except ValueError:
pass
logger = logging.getLogger(__name__)
logger.error(f"Could not parse timezone string: {tz}")


class DatabaseException(PhotoManagerBaseException):
Expand Down
167 changes: 132 additions & 35 deletions tests/integ_tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os
import subprocess
Expand All @@ -10,23 +11,16 @@

from photomanager import cli, database, version

from .test_photofile import PHOTOFILE_EXPECTED_RESULTS

FIXTURE_DIR = Path(__file__).resolve().parent.parent / "test_files"
ALL_IMG_DIRS = pytest.mark.datafiles(
FIXTURE_DIR / "A",
FIXTURE_DIR / "B",
FIXTURE_DIR / "C",
keep_top_dir=True,
)
EXPECTED_HASHES = {
"A/img1.jpg": "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
"A/img2.jpg": "3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666",
"A/img1.png": "1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9",
"A/img4.jpg": "79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c",
"B/img1.jpg": "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
"B/img2.jpg": "e9fec87008fd240309b81c997e7ec5491fee8da7eb1a76fc39b8fcafa76bb583",
"B/img4.jpg": "2b0f304f86655ebd04272cc5e7e886e400b79a53ecfdc789f75dd380cbcc8317",
"C/img3.tiff": "2aca4e78afbcebf2526ad8ac544d90b92991faae22499eec45831ef7be392391",
}
EXPECTED_HASHES = {name: pf.chk for name, pf in PHOTOFILE_EXPECTED_RESULTS.items()}


def check_dir_empty(dir_path):
Expand Down Expand Up @@ -498,14 +492,16 @@ def test_cli_import_no_overwrite(datafiles, caplog):
check_dir_empty(fs)


@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True)
def test_cli_index_skip_existing(datafiles, caplog):
@pytest.mark.datafiles(FIXTURE_DIR / "A", keep_top_dir=True)
def test_cli_index_dump_skip_existing(datafiles, caplog):
"""
index --dump prints correct photofile json to stdout
The --skip-existing flag prevents indexing existing source files
"""
caplog.set_level(logging.DEBUG)
runner = CliRunner()
runner = CliRunner(mix_stderr=False)
with runner.isolated_filesystem(temp_dir=datafiles) as fs:
print(os.listdir(datafiles / "A"))
result = runner.invoke(
cast(Group, cli.main),
[
Expand All @@ -514,26 +510,59 @@ def test_cli_index_skip_existing(datafiles, caplog):
str(datafiles / "test.json"),
"--priority",
"10",
"--timezone-default",
"-0400",
"--debug",
str(datafiles / "C"),
"--dump",
str(datafiles / "A" / "img1.jpg"),
str(datafiles / "A" / "img1.png"),
],
)
print("\nINDEX C")
print(result.output)
print("\nINDEX A")
print(result.stderr)
print(result.stdout)
print(result)
assert result.exit_code == 0
assert "Indexed 1/1 items" in caplog.messages
assert "Added 1 new items and merged 0 items" in caplog.messages
assert "Indexed 2/2 items" in caplog.messages
assert "Added 2 new items and merged 0 items" in caplog.messages

with open(datafiles / "test.json", "rb") as f:
s = f.read()
db = database.Database.from_json(s)
print(db.json)
assert sum(1 for _ in db.sources) == 1
assert set(db.sources) == {str(datafiles / "C" / "img3.tiff")}

with open(datafiles / "C" / "newphoto.jpg", "wb") as f:
f.write(b"contents")
assert sum(1 for _ in db.sources) == 2
assert set(db.sources) == {
str(datafiles / "A" / "img1.jpg"),
str(datafiles / "A" / "img1.png"),
}
assert json.loads(result.stdout) == {
str(datafiles / "A" / "img1.jpg"): {
"chk": (
"d090ce7023b57925e7e94fc80372e343"
"4fb1897e00b4452a25930dd1b83648fb"
),
"src": str(datafiles / "A" / "img1.jpg"),
"dt": "2015:08:01 18:28:36.90",
"ts": 1438468116.9,
"fsz": 771,
"sto": "",
"prio": 10,
"tzo": -14400.0,
},
str(datafiles / "A" / "img1.png"): {
"chk": (
"1e10df2e3abe4c810551525b6cb2eb80"
"5886de240e04cc7c13c58ae208cabfb9"
),
"src": str(datafiles / "A" / "img1.png"),
"dt": "2015:08:01 18:28:36.90",
"ts": 1438468116.9,
"fsz": 382,
"sto": "",
"prio": 10,
"tzo": -14400.0,
},
}

result = runner.invoke(
cast(Group, cli.main),
Expand All @@ -543,28 +572,96 @@ def test_cli_index_skip_existing(datafiles, caplog):
str(datafiles / "test.json"),
"--priority",
"10",
"--timezone-default",
"-0400",
"--skip-existing",
"--debug",
"--dry-run",
"--dump",
str(datafiles / "A"),
],
)
print("\nINDEX A dump skip-existing")
print(result.stderr)
print(result.stdout)
print(result)
assert result.exit_code == 0
assert "Indexed 2/2 items" in caplog.messages
assert "Added 2 new items and merged 0 items" in caplog.messages
assert json.loads(result.stdout) == {
str(datafiles / "A" / "img4.jpg"): {
"chk": (
"79ac4a89fb3d81ab1245b21b11ff7512"
"495debca60f6abf9afbb1e1fbfe9d98c"
),
"src": str(datafiles / "A" / "img4.jpg"),
"dt": "2018:08:01 20:28:36",
"ts": 1533169716.0,
"fsz": 759,
"sto": "",
"prio": 10,
"tzo": -14400.0,
},
str(datafiles / "A" / "img2.jpg"): {
"chk": (
"3b39f47d51f63e54c76417ee6e04c34b"
"d3ff5ac47696824426dca9e200f03666"
),
"src": str(datafiles / "A" / "img2.jpg"),
"dt": "2015:08:01 18:28:36.99",
"ts": 1438468116.99,
"fsz": 771,
"sto": "",
"prio": 10,
"tzo": -14400.0,
},
}

check_dir_empty(fs)


@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True)
def test_cli_index_dump_no_database(datafiles, caplog):
caplog.set_level(logging.DEBUG)
runner = CliRunner(mix_stderr=False)
with runner.isolated_filesystem(temp_dir=datafiles) as fs:
result = runner.invoke(
cast(Group, cli.main),
[
"index",
"--priority",
"10",
"--timezone-default",
"+0100",
"--skip-existing",
"--debug",
"--dump",
str(datafiles / "C"),
],
)
print("\nINDEX C skip-existing")
print(result.output)
print("\nINDEX C")
print(result.stderr)
print(result.stdout)
print(result)
assert result.exit_code == 0
assert "Indexed 1/1 items" in caplog.messages
assert "Added 1 new items and merged 0 items" in caplog.messages

with open(datafiles / "test.json", "rb") as f:
s = f.read()
db = database.Database.from_json(s)
print(db.json)
assert sum(1 for _ in db.sources) == 2
assert set(db.sources) == {
str(datafiles / "C" / "img3.tiff"),
str(datafiles / "C" / "newphoto.jpg"),
assert json.loads(result.stdout) == {
str(datafiles / "C" / "img3.tiff"): {
"chk": (
"2aca4e78afbcebf2526ad8ac544d90b9"
"2991faae22499eec45831ef7be392391"
),
"src": str(datafiles / "C" / "img3.tiff"),
"dt": "2018:08:01 19:28:36",
"ts": 1533148116.0,
"fsz": 506,
"sto": "",
"prio": 10,
"tzo": 3600.0,
}
}

print("\n".join(str(p) for p in Path(datafiles).glob("**/*")))
check_dir_empty(fs)


Expand Down
Loading

0 comments on commit 18800ca

Please sign in to comment.