Created new extractcode standlone command for aboutcode-org#52

pombredanne · Aug 10, 2015 · e02fd29 · e02fd29
1 parent b9339a4
commit e02fd29
Show file tree

Hide file tree

Showing 5 changed files with 366 additions and 0 deletions.
diff --git a/extractcode b/extractcode
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved.
+#
+
+# A minimal shell wrapper to the CLI entry point
+
+SCANCODE_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$SCANCODE_ROOT_DIR"
+
+SCANCODE_CONFIGURED_PYTHON=$SCANCODE_ROOT_DIR/bin/python
+if [ ! -f "$SCANCODE_CONFIGURED_PYTHON" ]; then
+    echo "* Configuring ScanCode ..."
+    CONFIGURE_QUIET=1 $SCANCODE_ROOT_DIR/configure etc/conf
+fi
+
+$SCANCODE_ROOT_DIR/bin/extractcode "$@"
diff --git a/extractcode.bat b/extractcode.bat
@@ -0,0 +1,35 @@
+@echo OFF
+@rem  Copyright (c) 2015 nexB Inc. http://www.nexb.com/ - All rights reserved.
+@rem  
+
+
+@rem  A minimal shell wrapper to the CLI entry point
+
+set SCANCODE_ROOT_DIR=%~dp0
+cd %SCANCODE_ROOT_DIR%
+
+set SCANCODE_CMD_LINE_ARGS= 
+set SCANCODE_CONFIGURED_PYTHON=%SCANCODE_ROOT_DIR%\bin\python.exe
+
+@rem Collect all command line arguments in a variable
+:collectarg
+ if ""%1""=="""" goto continue
+ call set SCANCODE_CMD_LINE_ARGS=%SCANCODE_CMD_LINE_ARGS% %1
+ shift
+ goto collectarg
+
+:continue
+
+
+if not exist %SCANCODE_CONFIGURED_PYTHON% goto configure
+goto scancode
+
+:configure
+ echo * Configuring ScanCode ...
+ set CONFIGURE_QUIET=1
+ call %SCANCODE_ROOT_DIR%\configure etc/conf
+
+:scancode
+%SCANCODE_ROOT_DIR%\bin\extractcode %SCANCODE_CMD_LINE_ARGS%
+
+:EOS
diff --git a/setup.py b/setup.py
@@ -115,6 +115,7 @@ def read(*names, **kwargs):
     entry_points={
         'console_scripts': [
             'scancode = scancode.cli:scancode',
+            'extractcode = scancode.extract_cli:extractcode',
         ],
     },
 )
diff --git a/src/scancode/extract_cli.py b/src/scancode/extract_cli.py
@@ -0,0 +1,176 @@
+#
+# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode require an acknowledgment.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# When you publish or redistribute any data created with ScanCode or any ScanCode
+# derivative work, you must accompany this data with the following acknowledgment:
+#
+#  Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+#  OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+#  ScanCode should be considered or used as legal advice. Consult an Attorney
+#  for any legal advice.
+#  ScanCode is a free software code scanning tool from nexB Inc. and others.
+#  Visit https://github.com/nexB/scancode-toolkit/ for support and download.
+
+from __future__ import print_function, absolute_import
+
+import os
+
+import click
+
+from scancode.cli import version
+from scancode.cli import print_about
+
+from scancode.api import extract_archives
+from scancode.utils import BaseCommand
+from commoncode import fileutils
+from scancode import utils
+from click.termui import style
+from commoncode.fileutils import as_posixpath
+
+
+def print_version(ctx, param, value):
+    if not value or ctx.resilient_parsing:
+        return
+    click.secho('ScanCode extractcode version ' + version)
+    ctx.exit()
+
+
+epilog_text = '''\b\bExamples:
+
+(Note for Windows: use '\\' backslash instead of '/' slash for paths.)
+
+\b
+Extract all archives found in the 'samples' directory tree:
+
+    extractcode samples
+
+Note: If an archive contains other archives, all contained archives will be
+extracted recursively. Extraction is done directly in the 'samples' directory,
+side-by-side with each archive. Files are extracted in a directory named after
+the archive with an '-extract' suffix added to its name, created side-by-side
+with the corresponding archive file.
+
+\b
+Extract a single archive. Files are extracted in the directory
+'samples/arch/zlib.tar.gz-extract/':
+
+    extractcode samples/arch/zlib.tar.gz
+'''
+
+
+class ExtractCommand(BaseCommand):
+    short_usage_help = '''
+Try 'extractcode --help' for help on options and arguments.'''
+
+
+@click.command(name='extractcode', epilog=epilog_text, cls=ExtractCommand)
+@click.pass_context
+@click.argument('input', metavar='<input>',
+type=click.Path(exists=True, readable=True)
+)
+
+@click.option(
+    '--verbose', is_flag=True, default=False,
+    help='Print verbose file-by-file progress messages.'
+)
+
+@click.help_option('-h', '--help')
+
+@click.option('--about',
+    is_flag=True, is_eager=True, callback=print_about,
+    help='Show information about ScanCode and licensing and exit.'
+)
+
+@click.option('--version',
+    is_flag=True, is_eager=True, callback=print_version,
+    help='Show the version and exit.'
+)
+
+def extractcode(ctx, input, verbose, *args, **kwargs):  # @ReservedAssignment
+    """extract archives and compressed files found in the <input> file or directory tree.
+
+    Use this command before scanning proper, as an <input> preparation step.
+    Archives found inside an extracted archive are extracted recursively.
+    Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
+    """
+
+    abs_input = as_posixpath(os.path.abspath(os.path.expanduser(input)))
+    rc = extract_with_progress(abs_input, verbose)
+    ctx.exit(rc)
+
+
+def extract_with_progress(input, verbose=False):  # @ReservedAssignment
+    """
+    Extract archives and display progress.
+    """
+    # note: we use inner functions so they can close on local variables
+
+    def extract_start():
+        return style('Extracting archives...', fg='green')
+
+    def extract_event(item):
+        """
+        Display an extract event.
+        """
+        if not item:
+            return ''
+        if verbose:
+            if item.done:
+                return ''
+            line = item.source or ''
+        else:
+            line = fileutils.file_name(item.source) or ''
+        return 'Extracting: %(line)s' % locals()
+
+
+    def extract_end():
+        """
+        Display a summary of warnings and errors if any.
+        """
+        has_warnings = False
+        has_errors = False
+        summary = []
+        for xev in extract_results:
+            has_errors = has_errors or bool(xev.errors)
+            has_warnings = has_warnings or bool(xev.warnings)
+            source = xev.source
+            for e in xev.errors:
+                summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False))
+            for warn in xev.warnings:
+                summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False))
+
+        summary_color = 'green'
+        if has_warnings:
+            summary_color = 'yellow'
+        if has_errors:
+            summary_color = 'red'
+
+        summary.append(style('Extracting done.', fg=summary_color, reset=True))
+        return '\n'.join(summary)
+
+
+    extract_results = []
+    has_extract_errors = False
+
+    with utils.progressmanager(extract_archives(input),
+                               item_show_func=extract_event,
+                               start_show_func=extract_start,
+                               finish_show_func=extract_end,
+                               verbose=verbose,
+                               ) as extraction_events:
+        for xev in extraction_events:
+            if xev.done and (xev.warnings or xev.errors):
+                has_extract_errors = has_extract_errors or xev.errors
+                extract_results.append(xev)
+    return 1 if has_extract_errors else 0
diff --git a/tests/scancode/test_extract_cli.py b/tests/scancode/test_extract_cli.py
@@ -0,0 +1,137 @@
+#
+# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode-toolkit/
+# The ScanCode software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode require an acknowledgment.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# When you publish or redistribute any data created with ScanCode or any ScanCode
+# derivative work, you must accompany this data with the following acknowledgment:
+#
+#  Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+#  OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+#  ScanCode should be considered or used as legal advice. Consult an Attorney
+#  for any legal advice.
+#  ScanCode is a free software code scanning tool from nexB Inc. and others.
+#  Visit https://github.com/nexB/scancode-toolkit/ for support and download.
+
+from __future__ import absolute_import, print_function
+
+import os
+
+import click
+from click.testing import CliRunner
+
+from commoncode.fileutils import as_posixpath
+
+from scancode import extract_cli
+from commoncode.testcase import FileDrivenTesting
+
+test_env = FileDrivenTesting()
+test_env.test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
+
+
+"""
+These CLI tests are dependent on py.test monkeypatch to  ensure we are testing
+the actual command outputs as if using a TTY or not.
+"""
+
+
+def test_extractcode_command_can_take_an_empty_directory(monkeypatch):
+    test_dir = test_env.get_temp_dir()
+    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+    runner = CliRunner()
+    result = runner.invoke(extract_cli.extractcode, [test_dir])
+    assert result.exit_code == 0
+    assert 'Extracting archives...' in result.output
+    assert 'Extracting done' in result.output
+
+
+def test_extractcode_command_does_extract_verbose(monkeypatch):
+    test_dir = test_env.get_test_loc('extract', copy=True)
+    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+    runner = CliRunner()
+    result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir])
+    assert result.exit_code == 1
+    assert os.path.exists(os.path.join(test_dir, 'some.tar.gz-extract'))
+    expected = [
+        'Extracting archives...',
+        '/some.tar.gz',
+        '/broken.tar.gz',
+        '/tarred_gzipped.tgz',
+        'ERROR extracting',
+        "/broken.tar.gz: 'Unrecognized archive format'",
+        'Extracting done.',
+    ]
+    for e in expected:
+        assert e in result.output
+
+
+def test_extractcode_command_does_no_show_anything_if_not_using_a_tty(monkeypatch):
+    test_dir = test_env.get_test_loc('extract/some.tar.gz', copy=True)
+    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: False)
+    runner = CliRunner()
+    result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir])
+    assert '' == result.output
+    result = runner.invoke(extract_cli.extractcode, [test_dir])
+    assert '' == result.output
+
+
+def test_extractcode_command_works_with_relative_paths(monkeypatch):
+    # The setup is a tad complex because we want to have a relative dir
+    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
+    # To use relative paths, we use our tmp dir at the root of the code tree
+    from os.path import dirname, join, abspath
+    from  commoncode import fileutils
+    import extractcode
+    import tempfile
+    import shutil
+
+    try:
+        scancode_root = dirname(dirname(dirname(__file__)))
+        scancode_tmp = join(scancode_root, 'tmp')
+        fileutils.create_dir(scancode_tmp)
+        scancode_root_abs = abspath(scancode_root)
+        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
+        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
+        shutil.copy(test_file, test_src_dir)
+        test_src_file = join(test_src_dir, 'basic.zip')
+        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX
+
+        runner = CliRunner()
+        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+        result = runner.invoke(extract_cli.extractcode, [test_src_file])
+        assert result.exit_code == 0
+        assert 'Extracting done' in result.output
+        assert not 'WARNING' in result.output
+        assert not 'ERROR' in result.output
+        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
+        file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)]
+        assert sorted(expected) == sorted(file_result)
+    finally:
+        fileutils.delete(test_src_dir)
+
+
+def test_usage_and_help_return_a_correct_script_name_on_all_platforms(monkeypatch):
+    runner = CliRunner()
+    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
+    result = runner.invoke(extract_cli.extractcode, ['--help'])
+    assert 'Usage: extractcode [OPTIONS]' in result.output
+    # this was showing up on Windows
+    assert 'extractcode-script.py' not in result.output
+
+    result = runner.invoke(extract_cli.extractcode, [])
+    assert 'Usage: extractcode [OPTIONS]' in result.output
+    # this was showing up on Windows
+    assert 'extractcode-script.py' not in result.output
+
+    result = runner.invoke(extract_cli.extractcode, ['-xyz'])
+    # this was showing up on Windows
+    assert 'extractcode-script.py' not in result.output