Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/extractcode/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from commoncode import fileutils
from commoncode import ignore
from commoncode import hash

import extractcode # NOQA
import extractcode.archive
Expand Down Expand Up @@ -86,6 +87,7 @@ def extract(
recurse=False,
replace_originals=False,
ignore_pattern=(),
known_archive_hashes=set()
):
"""
Walk and extract any archives found at ``location`` (either a file or
Expand Down Expand Up @@ -121,6 +123,7 @@ def extract(
kinds=kinds,
recurse=recurse,
ignore_pattern=ignore_pattern,
known_archive_hashes=known_archive_hashes
)

processed_events = []
Expand Down Expand Up @@ -151,6 +154,7 @@ def extract_files(
kinds=extractcode.default_kinds,
recurse=False,
ignore_pattern=(),
known_archive_hashes=set()
):
"""
Extract the files found at `location`.
Expand Down Expand Up @@ -190,7 +194,7 @@ def extract_files(
if not recurse and extractcode.is_extraction_path(loc):
if TRACE:
logger.debug(
'extract:walk not recurse: skipped file: %(loc)r' % locals())
'extract:walk: not recurse: skipped file: %(loc)r' % locals())
continue

if not extractcode.archive.should_extract(
Expand All @@ -203,6 +207,14 @@ def extract_files(
'extract:walk: skipped file: not should_extract: %(loc)r' % locals())
continue

file_hash = hash.sha256(loc)

if known_archive_hashes and file_hash in known_archive_hashes:
if TRACE:
logger.debug(
'extract:walk: skipped file: decompression bomb detected: %(loc)r' % locals())
continue

target = join(abspath(top), extractcode.get_extraction_path(loc))
if TRACE:
logger.debug('extract:target: %(target)r' % locals())
Expand All @@ -220,11 +232,16 @@ def extract_files(
if recurse:
if TRACE:
logger.debug('extract:walk: recursing on target: %(target)r' % locals())

kah = set(known_archive_hashes)
kah.add(file_hash)

for xevent in extract(
location=target,
kinds=kinds,
recurse=recurse,
ignore_pattern=ignore_pattern,
known_archive_hashes=kah
):
if TRACE:
logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())
Expand Down