Skip to content

Commit

Permalink
Add integrity check before index/import
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronkollasch committed Aug 20, 2022
1 parent 30dadc3 commit 4be56e2
Show file tree
Hide file tree
Showing 18 changed files with 142 additions and 7 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ jobs:

steps:
- uses: "actions/checkout@v2"
with:
submodules: 'true'
- uses: "actions/setup-python@v2"
with:
python-version: "${{ matrix.python-version }}"
- name: Install exiftool
run: sudo apt-get install libimage-exiftool-perl
- name: Install exiftool and ffmpeg
run: sudo apt-get install libimage-exiftool-perl ffmpeg
- name: "Install b3sum"
run: |
sudo wget https://github.com/BLAKE3-team/BLAKE3/releases/download/1.2.0/b3sum_linux_x64_bin -O /usr/local/bin/b3sum
Expand Down Expand Up @@ -67,6 +69,7 @@ jobs:
with:
# pulls all commits (needed for lerna / semantic release to correctly version)
fetch-depth: "0"
submodules: 'true'
- uses: "actions/setup-python@v2"
with:
python-version: "3.9"
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "check-media-integrity"]
path = src/photomanager/check_media_integrity
url = https://github.com/aaronkollasch/check-media-integrity/
9 changes: 8 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ extend-exclude = '''
# A regex preceded with ^/ will apply only to files and directories
# in the root of the project.
# ^/foo.py # exclude a file named foo.py in the root of the project (in addition to the defaults)
^/src/photomanager/_version.py
(
^/src/photomanager/_version[.]py
| ^/src/photomanager/check_media_integrity/
)
'''

[tool.isort]
profile = "black"
skip_gitignore = true
skip_glob = ["src/photomanager/check_media_integrity/*"]
line_length = 88

[tool.pytest.ini_options]
Expand All @@ -36,3 +40,6 @@ testpaths = [
markers = [
"datafiles",
]
filterwarnings = [
"ignore:.*the imp module is deprecated.*:DeprecationWarning",
]
20 changes: 19 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ install_requires =
zstandard>=0.15.2
xxhash>=2.0.2
blake3~=0.3.0
ffmpeg-python
Pillow-SIMD ; platform_machine=="i386"
Pillow-SIMD ; platform_machine=="x86"
Pillow-SIMD ; platform_machine=="x64"
Pillow-SIMD ; platform_machine=="x86_64"
Pillow ; platform_machine=="arm"
Pillow ; platform_machine=="armv7l"
Pillow ; platform_machine=="aarch64"
Pillow ; platform_machine=="arm64"
PyPDF2
Wand
filetype
pyheif
psutil
python_requires = >=3.8
package_dir =
= src
Expand Down Expand Up @@ -75,6 +89,9 @@ deps =
pytest
pytest-datafiles
coverage
conda_deps =
imagemagick
ffmpeg
;install_command = pip install --no-compile {opts} {packages}
commands =
coverage run -m pytest
Expand All @@ -88,7 +105,7 @@ deps =
commands =
black --check --diff .
isort --check --diff .
flake8 --count src tests benchmarks
flake8 --count src tests benchmarks --extend-exclude 'src/photomanager/check_media_integrity'

[testenv:twine]
deps =
Expand Down Expand Up @@ -137,6 +154,7 @@ show_missing = true
skip_covered = true
omit =
src/photomanager/actions/migrate.py
src/photomanager/check_media_integrity/*
exclude_lines =
pragma: no cover
if TYPE_CHECKING:
Expand Down
1 change: 1 addition & 0 deletions src/photomanager/check_media_integrity
Submodule check_media_integrity added at f1a22c
24 changes: 22 additions & 2 deletions src/photomanager/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import json
import logging
import sys
from os import PathLike
from os import PathLike, cpu_count
from typing import Iterable, Optional, Union

import click

from photomanager import version
from photomanager.actions import actions, fileops
from photomanager.check_media_integrity.check_mi import check_files
from photomanager.database import Database, sizeof_fmt
from photomanager.hasher import DEFAULT_HASH_ALGO, HASH_ALGORITHMS, HashAlgorithm

Expand Down Expand Up @@ -81,6 +82,8 @@ def _create(
help="Name patterns to exclude")
@click.option("--skip-existing", default=False, is_flag=True,
help="Don't index files that are already in the database")
@click.option("--check-integrity", default=False, is_flag=True,
help="Check media integrity and don't index bad files")
@click.option("--priority", type=int, default=10,
help="Priority of indexed photos (lower is preferred, default=10)")
@click.option("--timezone-default", type=str, default=None,
Expand Down Expand Up @@ -108,6 +111,7 @@ def _index(
paths: Iterable[Union[str, PathLike]] = tuple(),
exclude: Iterable[str] = tuple(),
skip_existing: bool = False,
check_integrity: bool = False,
debug: bool = False,
dry_run: bool = False,
dump: bool = False,
Expand Down Expand Up @@ -135,6 +139,11 @@ def _index(
exclude_files=skip_existing,
paths=paths,
)
bad_files = None
if check_integrity:
bad_files = check_files(filtered_files, timeout=None, threads=cpu_count())
for filename in bad_files:
del filtered_files[filename]
index_result = actions.index(
database=database,
files=filtered_files,
Expand All @@ -150,7 +159,7 @@ def _index(
print(json.dumps(result, indent=2))
if db is not None and not dry_run:
database.save(path=db, argv=sys.argv)
click_exit(1 if index_result["num_error_photos"] else 0)
click_exit(1 if bad_files or index_result["num_error_photos"] else 0)


# fmt: off
Expand Down Expand Up @@ -205,6 +214,8 @@ def _collect(
help="Name patterns to exclude")
@click.option("--skip-existing", default=False, is_flag=True,
help="Don't index files that are already in the database")
@click.option("--check-integrity", default=False, is_flag=True,
help="Check media integrity and don't index bad files")
@click.option("--priority", type=int, default=10,
help="Priority of indexed photos (lower is preferred, default=10)")
@click.option("--timezone-default", type=str, default=None,
Expand All @@ -228,6 +239,7 @@ def _import(
paths: Iterable[Union[str, PathLike]] = tuple(),
exclude: Iterable[str] = tuple(),
skip_existing: bool = False,
check_integrity: bool = False,
debug: bool = False,
dry_run: bool = False,
priority: int = 10,
Expand All @@ -245,6 +257,13 @@ def _import(
exclude_files=skip_existing,
paths=paths,
)
bad_files = None
if check_integrity:
bad_files = check_files(
filtered_files, error_detect="strict", timeout=None, threads=cpu_count()
)
for filename in bad_files:
del filtered_files[filename]
index_result = actions.index(
database=database,
files=filtered_files,
Expand All @@ -267,6 +286,7 @@ def _import(
if index_result["num_error_photos"]
or collect_result["num_missed_photos"]
or collect_result["num_error_photos"]
or bad_files
else 0
)

Expand Down
71 changes: 71 additions & 0 deletions tests/integ_tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,38 @@ def test_cli_index_dump_no_database(datafiles, caplog):
check_dir_empty(fs)


@pytest.mark.datafiles(FIXTURE_DIR / "D", keep_top_dir=True)
def test_cli_index_check_integrity(datafiles, caplog):
caplog.set_level(logging.DEBUG)
runner = CliRunner()
with runner.isolated_filesystem(temp_dir=datafiles) as fs:
result = runner.invoke(
cast(Group, cli.main),
[
"index",
"--check-integrity",
"--debug",
"--dump",
str(datafiles / "D"),
],
)
print("\nINDEX D")
print(result.output)
print(result)
assert result.exit_code == 1
assert "Indexed 3/3 items" in caplog.messages
assert "Added 3 new items and merged 0 items" in caplog.messages
assert "D/fake_movie_text.mp4" in result.output
assert "D/fake_movie_magic.mp4" in result.output
assert "D/img1_trunc.jpg" in result.output
assert "D/img1_trunc.png" in result.output
assert "D/img1_trunc.mp4" in result.output
assert "D/empty.nef" in result.output
assert "D/wrong_type.jpg" in result.output
print("\n".join(str(p) for p in Path(datafiles).glob("**/*")))
check_dir_empty(fs)


@pytest.mark.datafiles(FIXTURE_DIR / "C", keep_top_dir=True)
def test_cli_import_skip_existing(datafiles, caplog):
"""
Expand Down Expand Up @@ -750,6 +782,45 @@ def test_cli_import_skip_existing(datafiles, caplog):
check_dir_empty(fs)


@pytest.mark.datafiles(FIXTURE_DIR / "D", keep_top_dir=True)
def test_cli_import_check_integrity(datafiles, caplog):
caplog.set_level(logging.DEBUG)
runner = CliRunner()
with runner.isolated_filesystem(temp_dir=datafiles) as fs:
os.makedirs(datafiles / "dest")
result = runner.invoke(
cast(Group, cli.main),
[
"import",
"--check-integrity",
"--db",
str(datafiles / "test.json"),
"--destination",
str(datafiles / "dest"),
"--priority",
"10",
"--debug",
str(datafiles / "D"),
],
)
print("\nIMPORT D")
print(result.output)
print(result)
assert result.exit_code == 1
assert "Indexed 3/3 items" in caplog.messages
assert "Added 3 new items and merged 0 items" in caplog.messages
assert any("Copied 3 items" in m for m in caplog.messages)
assert "D/fake_movie_text.mp4" in result.output
assert "D/fake_movie_magic.mp4" in result.output
assert "D/img1_trunc.jpg" in result.output
assert "D/img1_trunc.png" in result.output
assert "D/img1_trunc.mp4" in result.output
assert "D/empty.nef" in result.output
assert "D/wrong_type.jpg" in result.output
print("\n".join(str(p) for p in Path(datafiles).glob("**/*")))
check_dir_empty(fs)


@ALL_IMG_DIRS
def test_cli_verify(datafiles, caplog):
caplog.set_level(logging.DEBUG)
Expand Down
Empty file added tests/test_files/D/empty.nef
Empty file.
Binary file added tests/test_files/D/fake_movie_magic.mp4
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_files/D/fake_movie_text.mp4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
askldjsalkdjklsajdklsajdklsajdlksjadlkjsakljdlksajklsjadkljaslkdjlskajdlksajdkljsalkdjslad,cx.,xcx9c89x8c9x898cx098x908
Binary file added tests/test_files/D/img1.mp4
Binary file not shown.
Binary file added tests/test_files/D/img1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/test_files/D/img1_trunc.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/test_files/D/img1_trunc.mp4
Binary file not shown.
Binary file added tests/test_files/D/img1_trunc.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/test_files/D/img4.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/test_files/D/wrong_type.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 12 additions & 1 deletion tests/unit_tests/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def test_list_files_paths_exclude(self, datafiles, caplog):
The list_files exclude argument removes filenames matching the patterns
"""
caplog.set_level(logging.DEBUG)
files = fileops.list_files(paths=[str(datafiles)], exclude=["img1", ".tiff"])
files = fileops.list_files(
paths=[str(datafiles)], exclude=["img1", ".tiff", "D"]
)
print(files)
assert set(files.keys()) == {
str(datafiles / "A" / "img2.jpg"),
Expand Down Expand Up @@ -147,6 +149,15 @@ def test_list_files_non_file(self, tmpdir, caplog):
assert next(iter(files)) == str(tmpdir / "not_a_file.jpg" / "test2.jpg")
assert any("not a file" in m for m in caplog.messages)

def test_index_photos_empty_list(self, caplog):
"""
async index_photos does not error if no files are given
"""
caplog.set_level(logging.DEBUG)
photos = fileops.index_photos(files=[], storage_type="SSD")
print(photos)
assert len(photos) == 0

@pytest.mark.datafiles(
FIXTURE_DIR / "B",
keep_top_dir=True,
Expand Down

0 comments on commit 4be56e2

Please sign in to comment.