Fix typing-related mypy errors

aaronkollasch · Aug 31, 2022 · 4d024d5 · 4d024d5
1 parent 7076eda
commit 4d024d5
Show file tree

Hide file tree

Showing 10 changed files with 118 additions and 62 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -166,6 +166,7 @@ exclude_lines =
     pragma: no cover
     if TYPE_CHECKING:
     raise NotImplementedError
+    return NotImplemented
     if __name__ == .__main__.:
     # typing-related code
     ^if (False|TYPE_CHECKING):

diff --git a/src/photomanager/actions/actions.py b/src/photomanager/actions/actions.py
@@ -6,7 +6,7 @@
 from collections.abc import Container, Iterable
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union
+from typing import Optional, TypedDict, Union
 
 from tqdm import tqdm
 
@@ -15,13 +15,22 @@
 from photomanager.photofile import PhotoFile
 
 
+class IndexResult(TypedDict):
+    changed_uids: set[str]
+    num_added_photos: int
+    num_merged_photos: int
+    num_skipped_photos: int
+    num_error_photos: int
+    photos: list[Optional[PhotoFile]]
+
+
 def index(
     database: Database,
     files: Iterable[Union[str, PathLike]],
     priority: int = 10,
     timezone_default: Optional[str] = None,
     storage_type: str = "HDD",
-) -> dict[str, Union[int, set[str], list[PhotoFile]]]:
+) -> IndexResult:
     """
     Index photo files and add them to the database.
 

diff --git a/src/photomanager/actions/fileops.py b/src/photomanager/actions/fileops.py
@@ -41,23 +41,23 @@ def list_files(
     :return: A dictionary with paths as keys.
     """
     logger = logging.getLogger(__name__)
-    paths = {Path(p).expanduser().resolve(): None for p in paths}
+    paths_resolved = {Path(p).expanduser().resolve(): None for p in paths}
     if source == "-":
         with click.open_file("-", "r") as f:
-            sources: str = f.read()
-        paths.update(
+            sources = f.read()
+        paths_resolved.update(
             {
                 Path(p).expanduser().resolve(): None
                 for p in sources.splitlines(keepends=False)
             }
         )
     elif source:
-        paths[Path(source).expanduser().resolve()] = None
+        paths_resolved[Path(source).expanduser().resolve()] = None
 
-    files = {}
+    files: dict[Path, None] = {}
     if file == "-":
         with click.open_file("-", "r") as f:
-            sources: str = f.read()
+            sources = f.read()
         files.update(
             {
                 Path(p).expanduser().resolve(): None
@@ -66,15 +66,15 @@ def list_files(
         )
     elif file:
         files[Path(file).expanduser().resolve()] = None
-    for path in paths:
+    for path in paths_resolved:
         if path.is_file():
             files[path] = None
         else:
             for p in path.glob("**/*.*"):
                 files[p] = None
 
     exclude_files = {Path(f).expanduser().resolve() for f in exclude_files}
-    filtered_files = {}
+    filtered_files: dict[str, None] = {}
     exclude_patterns = [re.compile(pat) for pat in set(exclude)]
     skipped_extensions = set()
     for p in files:
@@ -116,26 +116,27 @@ def index_photos(
     logger = logging.getLogger(__name__)
     if storage_type in ("SSD", "RAID"):
         async_hashes = True
-        async_exif = cpu_count()
+        async_exif = cpu_count() or 1
     else:
         # concurrent reads of sequential files can lead to thrashing
         async_hashes = False
         # exiftool is partially CPU-bound and benefits from async
-        async_exif = min(4, cpu_count())
+        async_exif = min(4, cpu_count() or 1)
     logger.info("Collecting media hashes")
+    files_normalized = [str(f) for f in files]
     checksum_cache = AsyncFileHasher(
         algorithm=hash_algorithm, use_async=async_hashes
-    ).check_files(files, pbar_unit="B")
+    ).check_files(files_normalized, pbar_unit="B")
     logger.info("Collecting media dates and times")
     datetime_cache = AsyncExifTool(num_workers=async_exif).get_best_datetime_batch(
-        files
+        files_normalized
     )
 
     logger.info("Indexing media")
     photos: list[Optional[PhotoFile]] = []
     exiftool = ExifTool()
     exiftool.start()
-    for current_file in tqdm(files):
+    for current_file in tqdm(files_normalized):
         if logger.isEnabledFor(logging.DEBUG):
             tqdm.write(f"Indexing {current_file}", file=sys.stderr)
         try:

diff --git a/src/photomanager/actions/migrate.py b/src/photomanager/actions/migrate.py
@@ -76,7 +76,10 @@ def make_hash_map(
 
 
 def map_hashes(
-    database: Database, new_algo: str, hash_map: dict[str, str], map_all: bool = False
+    database: Database,
+    new_algo: HashAlgorithm,
+    hash_map: dict[str, str],
+    map_all: bool = False,
 ) -> Optional[int]:  # pragma: no cover
     """Map the database's checksums to a new algorithm.
 

diff --git a/src/photomanager/async_base.py b/src/photomanager/async_base.py
@@ -16,7 +16,7 @@
 
 def make_chunks(
     it: Iterable[T], size: int, init: Collection[T] = ()
-) -> Generator[list[T]]:
+) -> Generator[list[T], None, None]:
     chunk = list(init)
     for item in it:
         chunk.append(item)
@@ -37,7 +37,7 @@ def size(self) -> int:
 class AsyncWorkerQueue:
     def __init__(
         self,
-        num_workers: int = cpu_count(),
+        num_workers: int = cpu_count() or 1,
         show_progress: bool = False,
     ):
         self.num_workers: int = num_workers
@@ -73,6 +73,8 @@ def close_pbar(self):
     async def worker(self, worker_id: int):
         try:
             while True:
+                if self.queue is None:  # pragma: no cover
+                    break
                 job: AsyncJob = await self.queue.get()
                 try:
                     await self.do_job(worker_id, job)

diff --git a/src/photomanager/cli.py b/src/photomanager/cli.py
@@ -19,7 +19,7 @@
 except ImportError as e:
     check_files_message = str(e)
 
-    def check_files(*_, **__):
+    def check_files(*_, **__):  # type: ignore
         raise Exception("check-media-integrity not available: " + check_files_message)
 
 
@@ -135,16 +135,16 @@ def _index(
     config_logging(debug=debug)
     if db is not None:
         database = Database.from_file(db, create_new=True)
-        skip_existing = set(database.sources) if skip_existing else set()
+        exclude_files = set(database.sources) if skip_existing else set()
     else:
         database = Database()
-        skip_existing = set()
+        exclude_files = set()
         database.hash_algorithm = HashAlgorithm(hash_algorithm)
     filtered_files = fileops.list_files(
         source=source,
         file=file,
         exclude=exclude,
-        exclude_files=skip_existing,
+        exclude_files=exclude_files,
         paths=paths,
     )
     bad_files = None
@@ -163,7 +163,7 @@ def _index(
         photos = index_result["photos"]
         result = {}
         for filename, photo in zip(filtered_files, photos):
-            result[filename] = photo.to_dict()
+            result[filename] = photo.to_dict() if photo is not None else None
         print(json.dumps(result, indent=2))
     if db is not None and not dry_run:
         database.save(path=db, argv=sys.argv)
@@ -257,12 +257,12 @@ def _import(
 ):
     config_logging(debug=debug)
     database = Database.from_file(db, create_new=True)
-    skip_existing = set(database.sources) if skip_existing else set()
+    exclude_files = set(database.sources) if skip_existing else set()
     filtered_files = fileops.list_files(
         source=source,
         file=file,
         exclude=exclude,
-        exclude_files=skip_existing,
+        exclude_files=exclude_files,
         paths=paths,
     )
     bad_files = None

diff --git a/src/photomanager/database.py b/src/photomanager/database.py
@@ -4,7 +4,7 @@
 import logging
 import random
 import shlex
-from collections.abc import Container, Iterable
+from collections.abc import Container, Generator, Iterable
 from datetime import datetime, tzinfo
 from math import log
 from os import PathLike, cpu_count, makedirs, rename
@@ -102,7 +102,9 @@ def __init__(self):
         self.timestamp_to_uids: dict[float, dict[str, None]] = {}
         self._hash: int = hash(self)
 
-    def __eq__(self, other: DB) -> bool:
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Database):
+            return NotImplemented
         return self.db == other.db
 
     def __hash__(self) -> int:
@@ -149,7 +151,7 @@ def command_history(self) -> dict[str, str]:
         return self.db["command_history"]
 
     @property
-    def sources(self) -> str:
+    def sources(self) -> Generator[str, None, None]:
         for photos in self.photo_db.values():
             for photo in photos:
                 yield photo.src
@@ -294,8 +296,8 @@ def to_file(self, path: Union[str, PathLike], overwrite: bool = False) -> None:
                     f"{type(e).__name__} {e}"
                 )
                 try:
-                    name, version = base_path.name.rsplit("_", 1)
-                    version = int(version)
+                    name, version_string = base_path.name.rsplit("_", 1)
+                    version = int(version_string)
                     base_path = base_path.with_name(name + "_" + str(version + 1))
                 except ValueError:
                     new_paths = list(
@@ -330,7 +332,7 @@ def to_file(self, path: Union[str, PathLike], overwrite: bool = False) -> None:
                 cctx = zstd.ZstdCompressor(
                     level=7,
                     write_checksum=True,
-                    threads=cpu_count(),
+                    threads=cpu_count() or 1,
                 )
                 f.write(cctx.compress(save_bytes))
         else:
@@ -453,9 +455,9 @@ def add_photo(self, photo: PhotoFile, uid: Optional[str]) -> Optional[str]:
                 return None
         if uid is None:
             if photo.chk in self.hash_to_uid:
-                uid: str = self.hash_to_uid[photo.chk]
+                uid = self.hash_to_uid[photo.chk]
             else:
-                uid: str = self.generate_uuid()
+                uid = self.generate_uuid()
         if uid in self.photo_db:
             photos = self.photo_db[uid]
             assert not any(photo.chk == p.chk and photo.src == p.src for p in photos)