Skip to content

Commit

Permalink
Add mypy to linters (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronkollasch authored Nov 13, 2022
2 parents 0021a84 + bef9e8c commit 1dde211
Show file tree
Hide file tree
Showing 17 changed files with 284 additions and 109 deletions.
13 changes: 12 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ write_to_template = '''version = "{version}"
[tool.black]
line-length = 88
target-version = ['py38']
include = '^/(src|tests|benchmarks)/.*\.pyi?$'
include = '^/(src|tests|benchmarks|stubs)/.*\.pyi?$'
extend-exclude = '''
# A regex preceded with ^/ will apply only to files and directories
# in the root of the project.
Expand Down Expand Up @@ -43,3 +43,14 @@ markers = [
filterwarnings = [
"ignore:.*the imp module is deprecated.*:DeprecationWarning",
]

[tool.mypy]
ignore_missing_imports = true
warn_return_any = true
mypy_path = [
"src",
"stubs",
]
exclude = [
"photomanager/check_media_integrity/test_damage[.]py$",
]
4 changes: 4 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ flake8-black
coverage
twine
build
mypy
types-pillow
types-tqdm
types-xxhash
12 changes: 9 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ project_urls =
Source = https://github.com/aaronkollasch/photomanager
Tracker = https://github.com/aaronkollasch/photomanager/issues
license = MIT
license_file = LICENSE
keywords = photo video manager archive duplicate async checksum database
platforms = any
classifiers =
Expand Down Expand Up @@ -45,6 +44,7 @@ scripts =
where = src
exclude =
tests
stubs

[options.entry_points]
console_scripts =
Expand Down Expand Up @@ -83,9 +83,9 @@ requires = tox-conda; platform_system=="Darwin"

[gh-actions]
python =
3.8: py38, linters
3.8: py38,
3.9: py39,
3.10: py310,
3.10: py310, linters

[testenv]
# install pytest in the virtualenv where commands will be executed
Expand All @@ -108,10 +108,15 @@ deps =
isort
flake8
flake8-black
mypy
types-pillow
types-tqdm
types-xxhash
commands =
black --check --diff .
isort --check --diff .
flake8 --count src tests benchmarks
mypy src

[testenv:twine]
deps =
Expand Down Expand Up @@ -167,6 +172,7 @@ exclude_lines =
pragma: no cover
if TYPE_CHECKING:
raise NotImplementedError
return NotImplemented
if __name__ == .__main__.:
# typing-related code
^if (False|TYPE_CHECKING):
Expand Down
13 changes: 11 additions & 2 deletions src/photomanager/actions/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from collections.abc import Container, Iterable
from os import PathLike
from pathlib import Path
from typing import Optional, Union
from typing import Optional, TypedDict, Union

from tqdm import tqdm

Expand All @@ -15,13 +15,22 @@
from photomanager.photofile import PhotoFile


class IndexResult(TypedDict):
changed_uids: set[str]
num_added_photos: int
num_merged_photos: int
num_skipped_photos: int
num_error_photos: int
photos: list[Optional[PhotoFile]]


def index(
database: Database,
files: Iterable[Union[str, PathLike]],
priority: int = 10,
timezone_default: Optional[str] = None,
storage_type: str = "HDD",
) -> dict[str, Union[int, set[str], list[PhotoFile]]]:
) -> IndexResult:
"""
Index photo files and add them to the database.
Expand Down
27 changes: 14 additions & 13 deletions src/photomanager/actions/fileops.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,23 @@ def list_files(
:return: A dictionary with paths as keys.
"""
logger = logging.getLogger(__name__)
paths = {Path(p).expanduser().resolve(): None for p in paths}
paths_resolved = {Path(p).expanduser().resolve(): None for p in paths}
if source == "-":
with click.open_file("-", "r") as f:
sources: str = f.read()
paths.update(
sources = f.read()
paths_resolved.update(
{
Path(p).expanduser().resolve(): None
for p in sources.splitlines(keepends=False)
}
)
elif source:
paths[Path(source).expanduser().resolve()] = None
paths_resolved[Path(source).expanduser().resolve()] = None

files = {}
files: dict[Path, None] = {}
if file == "-":
with click.open_file("-", "r") as f:
sources: str = f.read()
sources = f.read()
files.update(
{
Path(p).expanduser().resolve(): None
Expand All @@ -66,15 +66,15 @@ def list_files(
)
elif file:
files[Path(file).expanduser().resolve()] = None
for path in paths:
for path in paths_resolved:
if path.is_file():
files[path] = None
else:
for p in path.glob("**/*.*"):
files[p] = None

exclude_files = {Path(f).expanduser().resolve() for f in exclude_files}
filtered_files = {}
filtered_files: dict[str, None] = {}
exclude_patterns = [re.compile(pat) for pat in set(exclude)]
skipped_extensions = set()
for p in files:
Expand Down Expand Up @@ -116,26 +116,27 @@ def index_photos(
logger = logging.getLogger(__name__)
if storage_type in ("SSD", "RAID"):
async_hashes = True
async_exif = cpu_count()
async_exif = cpu_count() or 1
else:
# concurrent reads of sequential files can lead to thrashing
async_hashes = False
# exiftool is partially CPU-bound and benefits from async
async_exif = min(4, cpu_count())
async_exif = min(4, cpu_count() or 1)
logger.info("Collecting media hashes")
files_normalized = [str(f) for f in files]
checksum_cache = AsyncFileHasher(
algorithm=hash_algorithm, use_async=async_hashes
).check_files(files, pbar_unit="B")
).check_files(files_normalized, pbar_unit="B")
logger.info("Collecting media dates and times")
datetime_cache = AsyncExifTool(num_workers=async_exif).get_best_datetime_batch(
files
files_normalized
)

logger.info("Indexing media")
photos: list[Optional[PhotoFile]] = []
exiftool = ExifTool()
exiftool.start()
for current_file in tqdm(files):
for current_file in tqdm(files_normalized):
if logger.isEnabledFor(logging.DEBUG):
tqdm.write(f"Indexing {current_file}", file=sys.stderr)
try:
Expand Down
5 changes: 4 additions & 1 deletion src/photomanager/actions/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def make_hash_map(


def map_hashes(
database: Database, new_algo: str, hash_map: dict[str, str], map_all: bool = False
database: Database,
new_algo: HashAlgorithm,
hash_map: dict[str, str],
map_all: bool = False,
) -> Optional[int]: # pragma: no cover
"""Map the database's checksums to a new algorithm.
Expand Down
10 changes: 6 additions & 4 deletions src/photomanager/async_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

def make_chunks(
it: Iterable[T], size: int, init: Collection[T] = ()
) -> Generator[list[T]]:
) -> Generator[list[T], None, None]:
chunk = list(init)
for item in it:
chunk.append(item)
Expand All @@ -37,7 +37,7 @@ def size(self) -> int:
class AsyncWorkerQueue:
def __init__(
self,
num_workers: int = cpu_count(),
num_workers: int = cpu_count() or 1,
show_progress: bool = False,
):
self.num_workers: int = num_workers
Expand All @@ -61,10 +61,10 @@ async def do_job(self, worker_id: int, job: AsyncJob):
raise NotImplementedError

def make_pbar(self, all_jobs: Collection[AsyncJob]):
raise NotImplementedError
self.pbar = tqdm(total=sum(job.size for job in all_jobs))

def update_pbar(self, job: AsyncJob):
raise NotImplementedError
self.pbar.update(n=job.size) if self.pbar else None

def close_pbar(self):
if self.pbar is not None:
Expand All @@ -73,6 +73,8 @@ def close_pbar(self):
async def worker(self, worker_id: int):
try:
while True:
if self.queue is None: # pragma: no cover
break
job: AsyncJob = await self.queue.get()
try:
await self.do_job(worker_id, job)
Expand Down
2 changes: 1 addition & 1 deletion src/photomanager/check_media_integrity
37 changes: 24 additions & 13 deletions src/photomanager/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
except ImportError as e:
check_files_message = str(e)

def check_files(*_, **__):
def check_files(*_, **__): # type: ignore
raise Exception("check-media-integrity not available: " + check_files_message)


Expand Down Expand Up @@ -113,7 +113,7 @@ def _create(
@click.argument("paths", nargs=-1, type=click.Path())
# fmt: on
def _index(
db: Union[str, PathLike] = None,
db: Optional[Union[str, PathLike]] = None,
source: Optional[Union[str, PathLike]] = None,
file: Optional[Union[str, PathLike]] = None,
paths: Iterable[Union[str, PathLike]] = tuple(),
Expand All @@ -135,16 +135,16 @@ def _index(
config_logging(debug=debug)
if db is not None:
database = Database.from_file(db, create_new=True)
skip_existing = set(database.sources) if skip_existing else set()
exclude_files = set(database.sources) if skip_existing else set()
else:
database = Database()
skip_existing = set()
exclude_files = set()
database.hash_algorithm = HashAlgorithm(hash_algorithm)
filtered_files = fileops.list_files(
source=source,
file=file,
exclude=exclude,
exclude_files=skip_existing,
exclude_files=exclude_files,
paths=paths,
)
bad_files = None
Expand All @@ -163,7 +163,7 @@ def _index(
photos = index_result["photos"]
result = {}
for filename, photo in zip(filtered_files, photos):
result[filename] = photo.to_dict()
result[filename] = photo.to_dict() if photo is not None else None
print(json.dumps(result, indent=2))
if db is not None and not dry_run:
database.save(path=db, argv=sys.argv)
Expand Down Expand Up @@ -257,12 +257,12 @@ def _import(
):
config_logging(debug=debug)
database = Database.from_file(db, create_new=True)
skip_existing = set(database.sources) if skip_existing else set()
exclude_files = set(database.sources) if skip_existing else set()
filtered_files = fileops.list_files(
source=source,
file=file,
exclude=exclude,
exclude_files=skip_existing,
exclude_files=exclude_files,
paths=paths,
)
bad_files = None
Expand Down Expand Up @@ -371,13 +371,24 @@ def _verify(

# fmt: off
@click.command("stats", help="Get database statistics")
@click.option("--db", type=click.Path(dir_okay=False, exists=True), required=True,
default=DEFAULT_DB, help="PhotoManager database path")
@click.argument("database", type=click.Path(dir_okay=False, exists=True),
required=False, default=None)
@click.option("--db", type=click.Path(dir_okay=False, exists=True), required=False,
default=None, help="PhotoManager database path")
# fmt: on
def _stats(db: Union[str, PathLike]):
def _stats(
database: Optional[Union[str, PathLike]],
db: Optional[Union[str, PathLike]],
):
config_logging()
database = Database.from_file(db)
num_uids, num_photos, num_stored_photos, total_file_size = database.get_stats()
if database is not None:
db_path = database
elif db is not None:
db_path = db
else:
raise click.BadArgumentUsage("Database path not provided.")
my_db = Database.from_file(db_path)
num_uids, num_photos, num_stored_photos, total_file_size = my_db.get_stats()
print(f"Total items: {num_photos}")
print(f"Total unique items: {num_uids}")
print(f"Total stored items: {num_stored_photos}")
Expand Down
Loading

0 comments on commit 1dde211

Please sign in to comment.