Skip to content

Commit

Permalink
Merge pull request #2011 from nexB/1996-extractcode-progress
Browse files Browse the repository at this point in the history
Restore progress reporting to extractcode #1996
  • Loading branch information
pombredanne authored Apr 21, 2020
2 parents c29bc6a + b3605b2 commit 794c7f1
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 12 deletions.
29 changes: 23 additions & 6 deletions src/extractcode/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False, replace_or
is True.
If `recurse` is True, extract recursively archives nested inside other
archives If `recurse` is false, then do not extract further an already
archives. If `recurse` is false, then do not extract further an already
extracted archive identified by the corresponding extract suffix location.
If `replace_originals` is True, the extracted archives are replaced by the
Expand All @@ -123,9 +123,16 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False, replace_or
if recurse and a nested archive is found, it is extracted to full depth
first before resuming the file system walk.
"""
events = list(extract_files(location, kinds, recurse))
processed_events = []
processed_events_append = processed_events.append
for event in extract_files(location, kinds, recurse):
yield event
if replace_originals:
processed_events_append(event)

# move files around
if replace_originals:
for xevent in reversed(events):
for xevent in reversed(processed_events):
if xevent.done:
source = xevent.source
target = xevent.target
Expand All @@ -134,12 +141,22 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False, replace_or
fileutils.delete(source)
fileutils.copytree(target, source)
fileutils.delete(target)
return events


def extract_files(location, kinds=extractcode.default_kinds, recurse=False):
"""
Extract the files found at `location`.
Extract only archives of a kind listed in the `kinds` kind tuple.
If `recurse` is True, extract recursively archives nested inside other
archives. If `recurse` is false, then do not extract further an already
extracted archive identified by the corresponding extract suffix location.
"""
ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={})
if TRACE:
logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals())

abs_location = abspath(expanduser(location))
for top, dirs, files in fileutils.walk(abs_location, ignored):
if TRACE:
Expand Down Expand Up @@ -192,8 +209,8 @@ def extract_file(location, target, kinds=extractcode.default_kinds, verbose=Fals
extractor = archive.get_extractor(location, kinds)
if TRACE:
logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
+ getattr(extractor, '__module__', '')
+ '.' + getattr(extractor, '__name__', ''))
+getattr(extractor, '__module__', '')
+'.' + getattr(extractor, '__name__', ''))
if extractor:
yield ExtractEvent(location, target, done=False, warnings=[], errors=[])
try:
Expand Down
14 changes: 8 additions & 6 deletions src/scancode/extract_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def display_extract_summary():
has_warnings = False
has_errors = False
summary = []
for xev in extract_results:
for xev in extract_result_with_errors:
has_errors = has_errors or bool(xev.errors)
has_warnings = has_warnings or bool(xev.warnings)
source = fileutils.as_posixpath(xev.source)
Expand All @@ -156,9 +156,11 @@ def display_extract_summary():
len_base_path = len(abs_location)
base_is_dir = filetype.is_dir(abs_location)

extract_results = []
extract_result_with_errors = []
unique_extract_events_with_errors = set()
has_extract_errors = False
extractibles = extract_archives(abs_location, recurse=not shallow, replace_originals=replace_originals)
extractibles = extract_archives(
abs_location, recurse=not shallow, replace_originals=replace_originals)

if not quiet:
echo_stderr('Extracting archives...', fg='green')
Expand All @@ -168,14 +170,14 @@ def display_extract_summary():
for xev in extraction_events:
if xev.done and (xev.warnings or xev.errors):
has_extract_errors = has_extract_errors or xev.errors
extract_results.append(xev)

if repr(xev) not in unique_extract_events_with_errors:
extract_result_with_errors.append(xev)
unique_extract_events_with_errors.add(repr(xev))
display_extract_summary()
else:
for xev in extractibles:
if xev.done and (xev.warnings or xev.errors):
has_extract_errors = has_extract_errors or xev.errors
extract_results.append(xev)

rc = 1 if has_extract_errors else 0
ctx.exit(rc)
Expand Down
Binary file added tests/extractcode/data/extract/generator/c.zip
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/extractcode/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from commoncode.system import on_linux
from commoncode.system import on_windows
from commoncode.system import py3
from types import GeneratorType


class TestExtract(FileBasedTesting):
Expand Down Expand Up @@ -1111,3 +1112,9 @@ def test_extract_zipslip_tar_posix(self):

warns = [r.warnings for r in result if r.warnings]
assert [] == warns

def test_extract_always_returns_a_generator_and_not_a_list(self):
# a test for #1996 to ensure that progress is displayed "progressively"
test_dir = self.get_test_loc('extract/generator', copy=True)
result = extract.extract(test_dir)
assert isinstance(result, GeneratorType)

0 comments on commit 794c7f1

Please sign in to comment.