Merge latest develop in 1728-package-manifests #1237

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
aboutcode-org · Oct 2, 2019 · 8a261dd · 8a261dd
2 parents 81db9c6 + 2529cd7
commit 8a261dd
Show file tree

Hide file tree

Showing 764 changed files with 34,747 additions and 871 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -20,29 +20,38 @@ jobs:
                 bin/codecov --token "$CODECOV_TOKEN"
 
               scancode: |
-                bin/py.test --reruns=3 -vvs --test-suite=all tests/scancode \
+                bin/py.test --reruns=3 -vvs --test-suite=all \
+                  tests/scancode \
                   --cov=src --cov-report=term --cov-report=xml
                 bin/codecov --token "$CODECOV_TOKEN"
 
               cluecode: |
-                bin/py.test -n 2 --reruns=3 -vvs --test-suite=all tests/cluecode \
+                bin/py.test -n 2 --reruns=3 -vvs --test-suite=all \
+                  tests/cluecode \
                   --cov=src --cov-report=term --cov-report=xml
                 bin/codecov --token "$CODECOV_TOKEN"
 
               license_base: |
                 bin/py.test -n 2 --reruns=3 -vvs --test-suite=all \
                   --ignore=tests/licensedcode/test_zzzz_cache.py \
                   --ignore=tests/licensedcode/test_detection_datadriven1.py \
+                  --ignore=tests/licensedcode/test_detection_datadriven2.py \
                   tests/licensedcode \
                   --cov=src --cov-report=term --cov-report=xml
                 bin/codecov --token "$CODECOV_TOKEN"
 
-              license_main: |
+              license_datadriven1: |
                 bin/py.test -n 2 --reruns=3 -vvs --test-suite=all \
                   tests/licensedcode/test_detection_datadriven1.py \
                   --cov=src --cov-report=term --cov-report=xml
                 bin/codecov --token "$CODECOV_TOKEN"
 
+              license_datadriven2: |
+                bin/py.test -n 2 --reruns=3 -vvs --test-suite=all \
+                  tests/licensedcode/test_detection_datadriven2.py \
+                  --cov=src --cov-report=term --cov-report=xml
+                bin/codecov --token "$CODECOV_TOKEN"
+
               license_cache: |
                 bin/py.test -n 2 --reruns=3 -vvs --test-suite=all \
                   tests/licensedcode/test_zzzz_cache.py \
@@ -60,7 +69,6 @@ jobs:
               scancode: Scripts\py.test -vvs --reruns=3 tests\scancode
               lic_cluecode: Scripts\py.test -vvs  --reruns=3 tests\cluecode  tests\licensedcode
 
-
     - template: etc/ci/azure-mac.yml
       parameters:
           job_name: macos1013_py27

diff --git a/etc/scripts/buildrules.py b/etc/scripts/buildrules.py
@@ -30,6 +30,7 @@
 import io
 import os
 
+import attr
 import click
 click.disable_unicode_literals_warning = True
 from license_expression import Licensing
@@ -40,6 +41,27 @@
 from licensedcode import match_hash
 
 
+@attr.attrs(slots=True)
+class LicenseRule(object):
+    data = attr.ib()
+    text = attr.ib()
+    raw_data = attr.ib(default=None)
+
+    def __attrs_post_init__(self, *args, **kwargs):
+        self.raw_data = rdat = '\n'.join(self.data).strip()
+        self.text = '\n'.join(self.text).strip()
+
+        # validate YAML syntax
+        try:
+            self.data = saneyaml.load(rdat)
+        except:
+            print('########################################################')
+            print('Invalid YAML:')
+            print(rdat)
+            print('########################################################')
+            raise
+
+
 def load_data(location='00-new-licenses.txt'):
     with io.open(location, encoding='utf-8') as o:
         lines = o.read().splitlines(False)
@@ -50,22 +72,26 @@ def load_data(location='00-new-licenses.txt'):
     text = []
     in_data = False
     in_text = False
-    for ln, line in enumerate(lines):
+    last_lines = []
+    for ln, line in enumerate(lines, 1):
+        last_lines.append(': '.join([str(ln), line]))
         if line == '----------------------------------------':
-            if not (ln == 0 or in_text):
-                raise Exception('Invalid structure: #{ln}: {line}'.format(**locals()))
+            if not (ln == 1 or in_text):
+                raise Exception('Invalid structure: #{ln}: {line}\n'.format(**locals()) +
+                                '\n'.join(last_lines[-10:]))
 
             in_data = True
             in_text = True
             if data and ''.join(text).strip():
-                rules.append((data, text))
+                rules.append(LicenseRule(data, text))
             data = []
             text = []
             continue
 
         if line == '---':
             if not in_data:
-                raise Exception('Invalid structure: #{ln}: {line}'.format(**locals()))
+                raise Exception('Invalid structure: #{ln}: {line}\n'.format(**locals()) +
+                                '\n'.join(last_lines[-10:]))
 
             in_data = False
             in_text = True
@@ -84,12 +110,15 @@ def load_data(location='00-new-licenses.txt'):
 
 def rule_exists(text):
     """
-    Return the matched rule if the text is an existing rule matched exactly, False otherwise.
+    Return the matched rule identifier if the text is an existing rule matched
+    exactly, False otherwise.
     """
     idx = cache.get_index()
 
     matches = idx.match(query_string=text)
-    if len(matches) != 1:
+    if not matches:
+        return False
+    if len(matches) > 1:
         return False
     match = matches[0]
     if match.matcher == match_hash.MATCH_HASH:
@@ -101,7 +130,13 @@ def find_rule_base_loc(license_expression):
     Return a new, unique and non-existing base name location suitable to create a new
     rule.
     """
-    template = license_expression.lower().strip().replace(' ', '_').replace('(', '').replace(')', '') + '_{}'
+    template = (license_expression
+        .lower()
+        .strip()
+        .replace(' ', '_')
+        .replace('(', '')
+        .replace(')', '')
+        +'_{}')
     idx = 1
     while True:
         base_name = template.format(idx)
@@ -130,48 +165,45 @@ def cli(licenses_file):
         ----------------------------------------
     """
 
-    rule_data = load_data(licenses_file)
+    rules_data = load_data(licenses_file)
     rules_tokens = set()
 
     licenses = cache.get_licenses_db()
     licensing = Licensing(licenses.values())
 
     print()
-    for data, text in rule_data:
-        rdat = '\n'.join(data)
-        rtxt = '\n'.join(text)
-        existing = rule_exists(rtxt)
+    for rule in rules_data:
+        existing = rule_exists(rule.text)
         if existing:
-            print('Skipping existing rule:', existing, 'with text:\n', rtxt[:50].strip(), '...')
+            print('Skipping existing rule:', existing, 'with text:\n', rule.text[:50].strip(), '...')
             continue
 
-        # validate YAML syntax
-        parsed = saneyaml.load(rdat)
-        if parsed.get('is_negative'):
-            license_expression = 'not-a-license'
+        if rule.data.get('is_negative'):
+            base_name = 'not-a-license'
         else:
-            _, _, license_expression = data[0].partition(': ')
-            license_expression = license_expression.strip()
+            license_expression = rule.data.get('license_expression')
             if not license_expression:
-                raise Exception('Missing license_expression for text:', rtxt)
+                raise Exception('Missing license_expression for text:', rule)
             licensing.parse(license_expression, validate=True, simple=True)
+            base_name = license_expression
 
-        base_loc = find_rule_base_loc(license_expression)
+        base_loc = find_rule_base_loc(base_name)
 
         data_file = base_loc + '.yml'
         with io.open(data_file, 'w', encoding='utf-8') as o:
-            o.write(rdat)
+            o.write(rule.raw_data)
 
         text_file = base_loc + '.RULE'
         with io.open(text_file, 'w', encoding='utf-8') as o:
-            o.write(rtxt)
+            o.write(rule.text)
+
         rule = models.Rule(data_file=data_file, text_file=text_file)
         rule_tokens = tuple(rule.tokens())
         if rule_tokens in rules_tokens:
             # cleanup
             os.remove(text_file)
             os.remove(data_file)
-            print('Skipping already added rule with text for:', license_expression)
+            print('Skipping already added rule with text for:', base_name)
         else:
             rules_tokens.add(rule_tokens)
             rule.dump()

diff --git a/plugins-builtin/extractcode-7z-system_provided/MANIFEST.in b/plugins-builtin/extractcode-7z-system_provided/MANIFEST.in
@@ -3,6 +3,7 @@ graft src
 include setup.py
 include setup.cfg
 include .gitignore
+include LICENSE.txt
 include README.md
 include MANIFEST.in
 

diff --git a/plugins-builtin/extractcode-7z-system_provided/setup.py b/plugins-builtin/extractcode-7z-system_provided/setup.py
@@ -16,7 +16,7 @@
 desc = '''A ScanCode path provider plugin to provide system package provided sevenzip binary.'''
 
 setup(
-    name='extractcode-7z',
+    name='extractcode-7z-system-provided',
     version='9.38.2',
     license='lgpl-2.1 and unrar and brian-gladman-3-clause',
     description=desc,

diff --git a/plugins-builtin/extractcode-7z-system_provided/src/extractcode_7z/__init__.py b/plugins-builtin/extractcode-7z-system_provided/src/extractcode_7z/__init__.py
@@ -26,41 +26,39 @@
 from __future__ import unicode_literals
 
 import platform
-from os.path import abspath
-from os.path import dirname
-from os.path import join
+from os import path
 
 from plugincode.location_provider import LocationProviderPlugin
-from plugincode.location_provider import location_provider_impl
 
 
 class SevenzipPaths(LocationProviderPlugin):
-
-def get_locations(self):
 
-        curr_dir = dirname(abspath(__file__))
-        distribution=platform.linux_distribution()[0]	
-
-	# List of various major distributions consisting of flavors
+    def get_locations(self):
+        """
+        Return a mapping of {location key: location} providing the installation
+        locations of the 7zip exe and shared libraries as installed on various
+        Linux distros or on FreeBSD.
+        """
+        mainstream_system = platform.system().lower()
+        if mainstream_system == 'linux':
+            distribution = platform.linux_distribution()[0].lower()
+            debian_based_distro = ['ubuntu', 'mint', 'debian']
+            rpm_based_distro = ['fedora', 'redhat']
 
-	debian_based_distro=['Ubuntu','Mint','debian']
-
-	rpm_based_distro=['Fedora','redhat']
+            if distribution in debian_based_distro:
+                lib_dir = '/usr/lib/p7zip'
 
-	if distribution in debian_based_distro:
-
-		lib_dir = '/usr/lib/p7zip'
-
-	elif distribution in rpm_based_distro:
-
-		lib_dir = '/usr/libexec/p7zip'
+            elif distribution in rpm_based_distro:
+                lib_dir = '/usr/libexec/p7zip'
 
-	else
-		lib_dir = '/usr'
+            else:
+                raise Exception('Unsupported system: {}'.format(distribution))
+        elif mainstream_system == 'freebsd':
+            lib_dir = '/usr/local/libexec/p7zip'
 
         locations = {
             'extractcode.sevenzip.libdir': lib_dir,
-            'extractcode.sevenzip.exe': join(lib_dir, '7z'),
+            'extractcode.sevenzip.exe': path.join(lib_dir, '7z'),
         }
 
         return locations
diff --git a/...ins-builtin/extractcode-libarchive-system_provided/src/extractcode_libarchive/__init__.py b/...ins-builtin/extractcode-libarchive-system_provided/src/extractcode_libarchive/__init__.py
@@ -25,39 +25,43 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
-from os.path import abspath
-from os.path import dirname
-from os.path import join
+from os import path
+import platform
 
 from plugincode.location_provider import LocationProviderPlugin
-from plugincode.location_provider import location_provider_impl
 
-import platform
 
 class LibarchivePaths(LocationProviderPlugin):
-	def get_locations(self):
-		curr_dir = dirname(abspath(__file__))
-		distribution=platform.linux_distribution()[0]	
-
-		# List of various major distributions consisting of flavors
-		debian_based_distro=['Ubuntu','Mint','debian']
-
-		rpm_based_distro=['Fedora','redhat']
-
-		if distribution in debian_based_distro:
-
-			lib_dir = '/usr/lib/x86_64-linux-gnu'
-
-		elif distribution in rpm_based_distro:
-
-			lib_dir = '/usr/lib64'
-
-		else:
-			#User defined if installation directory differs
-			lib_dir = '/usr'
-
-		locations = {
-			'extractcode.libarchive.libdir': lib_dir,
-			'extractcode.libarchive.dll': join(lib_dir, 'libarchive.so.13'),
-		}
-		return locations
+    def get_locations(self):
+        """
+        Return a mapping of {location key: location} providing the installation
+        locations of the libarchive shared library as installed on various Linux
+        distros or on FreeBSD.
+        """
+        system_arch = platform.machine()
+        mainstream_system = platform.system().lower()
+        if mainstream_system == 'linux':
+            distribution = platform.linux_distribution()[0].lower()
+            debian_based_distro = ['ubuntu', 'mint', 'debian']
+            rpm_based_distro = ['fedora', 'redhat']
+
+            if distribution in debian_based_distro:
+                lib_dir = '/usr/lib/'+system_arch+'-linux-gnu'
+
+            elif distribution in rpm_based_distro:
+                lib_dir = '/usr/lib64'
+
+            else:
+                raise Exception('Unsupported system: {}'.format(distribution))
+
+        elif mainstream_system == 'freebsd':
+            if path.isdir('/usr/local/'):
+                lib_dir = '/usr/local/lib'
+            else:
+                lib_dir = '/usr/lib'
+
+        locations = {
+            'extractcode.libarchive.libdir': lib_dir,
+            'extractcode.libarchive.dll': path.join(lib_dir, 'libarchive.so'),
+        }
+        return locations