Skip to content

Commit

Permalink
Fix typing-related mypy errors
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronkollasch committed Aug 31, 2022
1 parent 7076eda commit 4d024d5
Show file tree
Hide file tree
Showing 10 changed files with 118 additions and 62 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ exclude_lines =
pragma: no cover
if TYPE_CHECKING:
raise NotImplementedError
return NotImplemented
if __name__ == .__main__.:
# typing-related code
^if (False|TYPE_CHECKING):
Expand Down
13 changes: 11 additions & 2 deletions src/photomanager/actions/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from collections.abc import Container, Iterable
from os import PathLike
from pathlib import Path
from typing import Optional, Union
from typing import Optional, TypedDict, Union

from tqdm import tqdm

Expand All @@ -15,13 +15,22 @@
from photomanager.photofile import PhotoFile


class IndexResult(TypedDict):
changed_uids: set[str]
num_added_photos: int
num_merged_photos: int
num_skipped_photos: int
num_error_photos: int
photos: list[Optional[PhotoFile]]


def index(
database: Database,
files: Iterable[Union[str, PathLike]],
priority: int = 10,
timezone_default: Optional[str] = None,
storage_type: str = "HDD",
) -> dict[str, Union[int, set[str], list[PhotoFile]]]:
) -> IndexResult:
"""
Index photo files and add them to the database.
Expand Down
27 changes: 14 additions & 13 deletions src/photomanager/actions/fileops.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,23 @@ def list_files(
:return: A dictionary with paths as keys.
"""
logger = logging.getLogger(__name__)
paths = {Path(p).expanduser().resolve(): None for p in paths}
paths_resolved = {Path(p).expanduser().resolve(): None for p in paths}
if source == "-":
with click.open_file("-", "r") as f:
sources: str = f.read()
paths.update(
sources = f.read()
paths_resolved.update(
{
Path(p).expanduser().resolve(): None
for p in sources.splitlines(keepends=False)
}
)
elif source:
paths[Path(source).expanduser().resolve()] = None
paths_resolved[Path(source).expanduser().resolve()] = None

files = {}
files: dict[Path, None] = {}
if file == "-":
with click.open_file("-", "r") as f:
sources: str = f.read()
sources = f.read()
files.update(
{
Path(p).expanduser().resolve(): None
Expand All @@ -66,15 +66,15 @@ def list_files(
)
elif file:
files[Path(file).expanduser().resolve()] = None
for path in paths:
for path in paths_resolved:
if path.is_file():
files[path] = None
else:
for p in path.glob("**/*.*"):
files[p] = None

exclude_files = {Path(f).expanduser().resolve() for f in exclude_files}
filtered_files = {}
filtered_files: dict[str, None] = {}
exclude_patterns = [re.compile(pat) for pat in set(exclude)]
skipped_extensions = set()
for p in files:
Expand Down Expand Up @@ -116,26 +116,27 @@ def index_photos(
logger = logging.getLogger(__name__)
if storage_type in ("SSD", "RAID"):
async_hashes = True
async_exif = cpu_count()
async_exif = cpu_count() or 1
else:
# concurrent reads of sequential files can lead to thrashing
async_hashes = False
# exiftool is partially CPU-bound and benefits from async
async_exif = min(4, cpu_count())
async_exif = min(4, cpu_count() or 1)
logger.info("Collecting media hashes")
files_normalized = [str(f) for f in files]
checksum_cache = AsyncFileHasher(
algorithm=hash_algorithm, use_async=async_hashes
).check_files(files, pbar_unit="B")
).check_files(files_normalized, pbar_unit="B")
logger.info("Collecting media dates and times")
datetime_cache = AsyncExifTool(num_workers=async_exif).get_best_datetime_batch(
files
files_normalized
)

logger.info("Indexing media")
photos: list[Optional[PhotoFile]] = []
exiftool = ExifTool()
exiftool.start()
for current_file in tqdm(files):
for current_file in tqdm(files_normalized):
if logger.isEnabledFor(logging.DEBUG):
tqdm.write(f"Indexing {current_file}", file=sys.stderr)
try:
Expand Down
5 changes: 4 additions & 1 deletion src/photomanager/actions/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def make_hash_map(


def map_hashes(
database: Database, new_algo: str, hash_map: dict[str, str], map_all: bool = False
database: Database,
new_algo: HashAlgorithm,
hash_map: dict[str, str],
map_all: bool = False,
) -> Optional[int]: # pragma: no cover
"""Map the database's checksums to a new algorithm.
Expand Down
6 changes: 4 additions & 2 deletions src/photomanager/async_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

def make_chunks(
it: Iterable[T], size: int, init: Collection[T] = ()
) -> Generator[list[T]]:
) -> Generator[list[T], None, None]:
chunk = list(init)
for item in it:
chunk.append(item)
Expand All @@ -37,7 +37,7 @@ def size(self) -> int:
class AsyncWorkerQueue:
def __init__(
self,
num_workers: int = cpu_count(),
num_workers: int = cpu_count() or 1,
show_progress: bool = False,
):
self.num_workers: int = num_workers
Expand Down Expand Up @@ -73,6 +73,8 @@ def close_pbar(self):
async def worker(self, worker_id: int):
try:
while True:
if self.queue is None: # pragma: no cover
break
job: AsyncJob = await self.queue.get()
try:
await self.do_job(worker_id, job)
Expand Down
14 changes: 7 additions & 7 deletions src/photomanager/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
except ImportError as e:
check_files_message = str(e)

def check_files(*_, **__):
def check_files(*_, **__): # type: ignore
raise Exception("check-media-integrity not available: " + check_files_message)


Expand Down Expand Up @@ -135,16 +135,16 @@ def _index(
config_logging(debug=debug)
if db is not None:
database = Database.from_file(db, create_new=True)
skip_existing = set(database.sources) if skip_existing else set()
exclude_files = set(database.sources) if skip_existing else set()
else:
database = Database()
skip_existing = set()
exclude_files = set()
database.hash_algorithm = HashAlgorithm(hash_algorithm)
filtered_files = fileops.list_files(
source=source,
file=file,
exclude=exclude,
exclude_files=skip_existing,
exclude_files=exclude_files,
paths=paths,
)
bad_files = None
Expand All @@ -163,7 +163,7 @@ def _index(
photos = index_result["photos"]
result = {}
for filename, photo in zip(filtered_files, photos):
result[filename] = photo.to_dict()
result[filename] = photo.to_dict() if photo is not None else None
print(json.dumps(result, indent=2))
if db is not None and not dry_run:
database.save(path=db, argv=sys.argv)
Expand Down Expand Up @@ -257,12 +257,12 @@ def _import(
):
config_logging(debug=debug)
database = Database.from_file(db, create_new=True)
skip_existing = set(database.sources) if skip_existing else set()
exclude_files = set(database.sources) if skip_existing else set()
filtered_files = fileops.list_files(
source=source,
file=file,
exclude=exclude,
exclude_files=skip_existing,
exclude_files=exclude_files,
paths=paths,
)
bad_files = None
Expand Down
18 changes: 10 additions & 8 deletions src/photomanager/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import random
import shlex
from collections.abc import Container, Iterable
from collections.abc import Container, Generator, Iterable
from datetime import datetime, tzinfo
from math import log
from os import PathLike, cpu_count, makedirs, rename
Expand Down Expand Up @@ -102,7 +102,9 @@ def __init__(self):
self.timestamp_to_uids: dict[float, dict[str, None]] = {}
self._hash: int = hash(self)

def __eq__(self, other: DB) -> bool:
def __eq__(self, other: object) -> bool:
if not isinstance(other, Database):
return NotImplemented
return self.db == other.db

def __hash__(self) -> int:
Expand Down Expand Up @@ -149,7 +151,7 @@ def command_history(self) -> dict[str, str]:
return self.db["command_history"]

@property
def sources(self) -> str:
def sources(self) -> Generator[str, None, None]:
for photos in self.photo_db.values():
for photo in photos:
yield photo.src
Expand Down Expand Up @@ -294,8 +296,8 @@ def to_file(self, path: Union[str, PathLike], overwrite: bool = False) -> None:
f"{type(e).__name__} {e}"
)
try:
name, version = base_path.name.rsplit("_", 1)
version = int(version)
name, version_string = base_path.name.rsplit("_", 1)
version = int(version_string)
base_path = base_path.with_name(name + "_" + str(version + 1))
except ValueError:
new_paths = list(
Expand Down Expand Up @@ -330,7 +332,7 @@ def to_file(self, path: Union[str, PathLike], overwrite: bool = False) -> None:
cctx = zstd.ZstdCompressor(
level=7,
write_checksum=True,
threads=cpu_count(),
threads=cpu_count() or 1,
)
f.write(cctx.compress(save_bytes))
else:
Expand Down Expand Up @@ -453,9 +455,9 @@ def add_photo(self, photo: PhotoFile, uid: Optional[str]) -> Optional[str]:
return None
if uid is None:
if photo.chk in self.hash_to_uid:
uid: str = self.hash_to_uid[photo.chk]
uid = self.hash_to_uid[photo.chk]
else:
uid: str = self.generate_uuid()
uid = self.generate_uuid()
if uid in self.photo_db:
photos = self.photo_db[uid]
assert not any(photo.chk == p.chk and photo.src == p.src for p in photos)
Expand Down
Loading

0 comments on commit 4d024d5

Please sign in to comment.