diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py index 7d0187a0458..f5f1ed508b4 100644 --- a/src/cluecode/copyrights.py +++ b/src/cluecode/copyrights.py @@ -2517,7 +2517,8 @@ def build_detection_from_node( COMPANY: { } #19603 -####################################### +################################# #COPYRIGHT: { } #1802 +###### # VARIOUS FORMS OF COPYRIGHT ####################################### @@ -2572,6 +2573,8 @@ def build_detection_from_node( COPYRIGHT: { +} #1690 + COPYRIGHT: { } #1802 + COPYRIGHT: { + +} #1710 COPYRIGHT: { } #1711 @@ -4125,8 +4128,10 @@ def prepare_text_line(line, dedeb=True, to_ascii=True): # normalize (possibly repeated) quotes to unique single quote ' # backticks ` and " - .replace('`', u"'") - .replace('"', u"'") + .replace('`', "'") + .replace('"', "'") + # see https://github.com/nexB/scancode-toolkit/issues/3667 + .replace('§', " ") ) if TRACE_TOK: diff --git a/src/packagedcode/gemfile_lock.py b/src/packagedcode/gemfile_lock.py index 0639a475bc5..c65096c1e75 100644 --- a/src/packagedcode/gemfile_lock.py +++ b/src/packagedcode/gemfile_lock.py @@ -340,8 +340,8 @@ def get_option(s): '%(NAME_VERSION)s' '$' % locals()).match -PLATS = re.compile('^ (?P.*)$').match -BUNDLED_WITH = re.compile('^\s+(?P(?:\d+.)+\d+)\s*$').match +PLATS = re.compile(r'^ (?P.*)$').match +BUNDLED_WITH = re.compile(r'^\s+(?P(?:\d+.)+\d+)\s*$').match class GemfileLockParser: diff --git a/src/packagedcode/recognize.py b/src/packagedcode/recognize.py index c7e794ecaf6..5b629c01588 100644 --- a/src/packagedcode/recognize.py +++ b/src/packagedcode/recognize.py @@ -16,7 +16,7 @@ from packagedcode import ALL_DATAFILE_HANDLERS from packagedcode import models -TRACE = False or os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False) +TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False) def logger_debug(*args): @@ -62,7 +62,6 @@ def recognize_package_data( datafile_handlers = APPLICATION_PACKAGE_DATAFILE_HANDLERS elif system: datafile_handlers = SYSTEM_PACKAGE_DATAFILE_HANDLERS - return list(_parse(location, datafile_handlers=datafile_handlers)) @@ -78,6 +77,9 @@ def _parse( """ for handler in datafile_handlers: + if TRACE: + logger_debug(f'_parse:.is_datafile: {handler}') + if not handler.is_datafile(location): continue diff --git a/src/packagedcode/rpm.py b/src/packagedcode/rpm.py index bcbe8294070..50be812978c 100644 --- a/src/packagedcode/rpm.py +++ b/src/packagedcode/rpm.py @@ -233,6 +233,9 @@ class RpmInstalledNdbDatabaseHandler(BaseRpmInstalledDatabaseHandler): # TODO: add dependencies!!! class RpmInstalledSqliteDatabaseHandler(BaseRpmInstalledDatabaseHandler): # used by newer RHEL/CentOS/Fedora/CoreOS + # Filetype: SQLite 3.x database, ... + # Mimetype: application/vnd.sqlite3 + datasource_id = 'rpm_installed_database_sqlite' path_patterns = ('*rpm/rpmdb.sqlite',) default_package_type = 'rpm' diff --git a/src/summarycode/generated.py b/src/summarycode/generated.py index 7f29f519f62..e4937eb719e 100644 --- a/src/summarycode/generated.py +++ b/src/summarycode/generated.py @@ -176,6 +176,17 @@ def generated_scanner(location, **kwargs): # protoc 'generated by the protocol buffer compiler', 'generated by the protocol buffer compiler. do not edit!', + 'generated by the protocol buffer compiler. do not edit!', + 'code generated by protoc-gen-go. do not edit.', + 'code generated by generate-types. do not edit.', + 'code generated by generate-protos. do not edit.', + 'do not edit -- your changes will be discarded when the file is', # next line: * regenerated. + 'do not edit -- generated code', + 'generated code -- do not edit!', + 'autogenerated by method_dump.sh. do not edit by hand.', + 'this file is generated from the .proto files for the well-known', # next line: types. Do not edit! + 'this file was generated by upbc (the upb compiler) from the input', + 'do not edit -- your changes will be discarded when the file is', # next line: regenerated # autotools 'makefile.in generated by automake', @@ -191,6 +202,7 @@ def generated_scanner(location, **kwargs): 'generated by:javacc: do not edit this line', 'generated by:jjtree: do not edit this line', 'generated code (do not edit this line)', + )) diff --git a/src/textcode/strings.py b/src/textcode/strings.py index 2e3d470f9b2..db9e81bc36f 100644 --- a/src/textcode/strings.py +++ b/src/textcode/strings.py @@ -180,33 +180,33 @@ def is_shared_object(s): return so(s) +# TODO: implement me +_posix = re.compile('^/[\\w_\\-].*$', re.IGNORECASE).match def is_posix_path(s): """ Return True if s looks like a posix path. Example: /usr/lib/librt.so.1 or /usr/lib """ - # TODO: implement me - posix = re.compile('^/[\\w_\\-].*$', re.IGNORECASE).match - posix(s) - return False + return _posix(s) +# TODO: implement me +_relative = re.compile('^(?:([^/]|\\.\\.)[\\w_\\-]+/.*$)', re.IGNORECASE).match def is_relative_path(s): """ Return True if s looks like a relative posix path. Example: usr/lib/librt.so.1 or ../usr/lib """ - relative = re.compile('^(?:([^/]|\\.\\.)[\\w_\\-]+/.*$)', re.IGNORECASE).match - return relative(s) + return bool(_relative(s)) +_winpath = re.compile('^[\\w_\\-]+\\.so\\.[0-9]+\\.*.[0-9]*$', re.IGNORECASE).match def is_win_path(s): """ Return True if s looks like a win path. Example: c:\\usr\\lib\\librt.so.1. """ - winpath = re.compile('^[\\w_\\-]+\\.so\\.[0-9]+\\.*.[0-9]*$', re.IGNORECASE).match - return winpath(s) + return _winpath(s) def is_c_source(s): diff --git a/src/textcode/strings2.py b/src/textcode/strings2.py index fdfb583a57e..52f0a043660 100644 --- a/src/textcode/strings2.py +++ b/src/textcode/strings2.py @@ -23,8 +23,8 @@ import re ASCII_BYTE = ( - " !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t" + r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + r"\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t" ) diff --git a/tests/cluecode/data/copyrights/misco2/pom.xml b/tests/cluecode/data/copyrights/misco2/pom.xml new file mode 100644 index 00000000000..48b8608127f --- /dev/null +++ b/tests/cluecode/data/copyrights/misco2/pom.xml @@ -0,0 +1,4 @@ + + diff --git a/tests/cluecode/data/copyrights/misco2/pom.xml.yml b/tests/cluecode/data/copyrights/misco2/pom.xml.yml new file mode 100644 index 00000000000..6902e5f47cf --- /dev/null +++ b/tests/cluecode/data/copyrights/misco2/pom.xml.yml @@ -0,0 +1,4 @@ +what: + - copyrights +copyrights: + - Copyright (c) 2016 The Android Open Source Project diff --git a/tests/packagedcode/test_gemfile_lock.py b/tests/packagedcode/test_gemfile_lock.py index f3eb9e336e7..226c3c9da66 100644 --- a/tests/packagedcode/test_gemfile_lock.py +++ b/tests/packagedcode/test_gemfile_lock.py @@ -274,7 +274,7 @@ def test_Gem_as_nv_tree(self): } } } - self.assertEqual(expected, a.as_nv_tree()) + assert a.as_nv_tree() == expected def test_Gem_flatten(self): Gem = gemfile_lock.Gem @@ -320,7 +320,6 @@ def test_Gem_as_nv_tree_with_no_deps(self): results = a.as_nv_tree() assert results == expected - def test_Gem_to_dict(self): Gem = gemfile_lock.Gem a = Gem('a', '1') diff --git a/tests/packagedcode/test_maven.py b/tests/packagedcode/test_maven.py index 6898261423e..62349097ce4 100644 --- a/tests/packagedcode/test_maven.py +++ b/tests/packagedcode/test_maven.py @@ -250,7 +250,6 @@ def test_maven_unknown_reference_to_license_in_manifest(self): run_scan_click(['--package', '--license', '--license-diagnostics', '--processes', '-1', test_dir, '--json', result_file]) check_json_scan(expected_file, result_file, remove_uuid=True, regen=REGEN_TEST_FIXTURES) - def test_package_dependency_not_missing(self): test_file = self.get_test_loc('maven2/log4j/log4j-pom.xml') self.check_parse_to_package(test_file, regen=REGEN_TEST_FIXTURES) @@ -275,7 +274,7 @@ def test_get_top_level_resources(self): pom_resource = codebase.get_resource( 'activiti-image-generator-7-201802-EA-sources.jar-extract/META-INF/maven/org.activiti/activiti-image-generator/pom.xml' ) - self.assertTrue(pom_resource) + assert pom_resource top_level_resources_paths = [ r.path for r in maven.MavenPomXmlHandler.get_top_level_resources(pom_resource, codebase) ] @@ -288,7 +287,7 @@ def test_get_top_level_resources(self): 'activiti-image-generator-7-201802-EA-sources.jar-extract/META-INF/maven/org.activiti/activiti-image-generator/pom.properties', 'activiti-image-generator-7-201802-EA-sources.jar-extract/META-INF/maven/org.activiti/activiti-image-generator/pom.xml', ] - self.assertEquals(expected_resource_paths, top_level_resources_paths) + assert top_level_resources_paths == expected_resource_paths class TestPomProperties(testcase.FileBasedTesting): diff --git a/tests/packagedcode/test_pypi.py b/tests/packagedcode/test_pypi.py index ba55474fd17..5020e28913b 100644 --- a/tests/packagedcode/test_pypi.py +++ b/tests/packagedcode/test_pypi.py @@ -282,10 +282,10 @@ def test_parse_metadata_prefer_pkg_info_from_egg_info_from_command_line(self): # `celery/celery.egg-info/PKG-INFO` vc = VirtualCodebase(location=result_file) for dep in vc.attributes.dependencies: - self.assertEqual(dep['datafile_path'], 'celery/celery.egg-info/PKG-INFO') + assert dep['datafile_path'] == 'celery/celery.egg-info/PKG-INFO' for pkg in vc.attributes.packages: for path in pkg['datafile_paths']: - self.assertEqual(path, 'celery/celery.egg-info/PKG-INFO') + assert path == 'celery/celery.egg-info/PKG-INFO' class TestPipRequirementsFileHandler(PackageTester): @@ -417,47 +417,23 @@ def test_parse_dependency_file_with_invalid_does_not_fail(self): expected_loc = self.get_test_loc('pypi/requirements_txt/invalid_spec/output.expected.json') self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES) - def test_PipRequirementsFileHandler_is_datafile(self): - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('dev-requirements.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requirements.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requirement.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requirements.in', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requirements.pip', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requirements-dev.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('some-requirements-dev.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requires.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('requirements/base.txt', _bare_filename=True), - True - ) - self.assertEqual( - pypi.PipRequirementsFileHandler.is_datafile('reqs.txt', _bare_filename=True), - True - ) +@pytest.mark.parametrize( + 'filename', + [ + 'dev-requirements.txt', + 'reqs.txt', + 'requirements/base.txt', + 'requirements-dev.txt', + 'requirements.in', + 'requirements.pip', + 'requirements.txt', + 'requirement.txt', + 'requires.txt', + 'some-requirements-dev.txt', + ] +) +def test_PipRequirementsFileHandler_is_datafile(filename): + assert pypi.PipRequirementsFileHandler.is_datafile(location=filename, _bare_filename=True) class TestPyPiPipfile(PackageTester): @@ -607,7 +583,7 @@ def check_setup_py_parsing(test_loc): expected_loc2 = f'{test_loc}-expected.json' packages_data = pypi.PythonSetupPyHandler.parse(test_loc) - test_envt.check_packages_data( + env.check_packages_data( packages_data=packages_data, expected_loc=expected_loc2, regen=REGEN_TEST_FIXTURES, @@ -615,12 +591,12 @@ def check_setup_py_parsing(test_loc): ) -test_envt = PackageTester() +env = PackageTester() @pytest.mark.parametrize( 'test_loc', - get_setup_py_test_files(os.path.abspath(os.path.join(test_envt.test_data_dir, 'pypi', 'setup.py-versions'))), + get_setup_py_test_files(os.path.abspath(os.path.join(env.test_data_dir, 'pypi', 'setup.py-versions'))), ) def test_parse_setup_py_with_computed_versions(test_loc): check_setup_py_parsing(test_loc) @@ -628,7 +604,7 @@ def test_parse_setup_py_with_computed_versions(test_loc): @pytest.mark.parametrize( 'test_loc', - get_setup_py_test_files(os.path.abspath(os.path.join(test_envt.test_data_dir, 'pypi', 'setup.py'))) + get_setup_py_test_files(os.path.abspath(os.path.join(env.test_data_dir, 'pypi', 'setup.py'))) ) def test_parse_setup_py(test_loc): check_setup_py_parsing(test_loc) @@ -636,7 +612,7 @@ def test_parse_setup_py(test_loc): @pytest.mark.parametrize( 'test_loc', - get_setup_py_test_files(os.path.abspath(os.path.join(test_envt.test_data_dir, 'pypi', 'more_setup.py'))), + get_setup_py_test_files(os.path.abspath(os.path.join(env.test_data_dir, 'pypi', 'more_setup.py'))), ) def test_parse_more_setup_py(test_loc): check_setup_py_parsing(test_loc) diff --git a/tests/packagedcode/test_win_reg.py b/tests/packagedcode/test_win_reg.py index bc0ad2b077d..126a3dd9e71 100644 --- a/tests/packagedcode/test_win_reg.py +++ b/tests/packagedcode/test_win_reg.py @@ -60,14 +60,14 @@ def test_win_reg_remove_drive_letter(self): test_path = 'C:\\Users\\Test\\Desktop' expected_path = 'Users/Test/Desktop' result = remove_drive_letter(test_path) - self.assertEqual(result, expected_path) + assert result == expected_path def test_win_reg_create_absolute_installed_file_path(self): root_dir = '/home/test/c/' test_path = 'C:\\Program Files\\Test Program\\' result = create_absolute_installed_file_path(root_dir, test_path) expected_path = '/home/test/c/Program Files/Test Program' - self.assertEqual(result, expected_path) + assert result == expected_path def test_scan_system_package_end_to_end_installed_win_reg(self): test_dir = self.get_test_loc('win_reg/get_installed_packages_docker/layer') diff --git a/tests/summarycode/test_summarizer.py b/tests/summarycode/test_summarizer.py index 29ef4bfb793..4be0380906c 100644 --- a/tests/summarycode/test_summarizer.py +++ b/tests/summarycode/test_summarizer.py @@ -21,7 +21,6 @@ from summarycode.summarizer import get_primary_language from summarycode.summarizer import get_holders_from_copyright - pytestmark = pytest.mark.scanslow @@ -173,7 +172,6 @@ def test_summary_without_copyright_or_holders(self): ]) check_json_scan(expected_file, result_file, remove_uuid=True, remove_file_date=True, regen=REGEN_TEST_FIXTURES) - def test_remove_from_tallies(self): tallies = [ { @@ -205,7 +203,7 @@ def test_remove_from_tallies(self): } ] result_1 = remove_from_tallies(test_entry_1, copy(tallies)) - assert(result_1, expected_1) + assert result_1 == expected_1 test_entry_2 = [ { @@ -224,7 +222,7 @@ def test_remove_from_tallies(self): }, ] result_2 = remove_from_tallies(test_entry_2, copy(tallies)) - assert(result_2, expected_2) + assert result_2 == expected_2 test_entry_3 = 'apache-2.0' expected_3 = [ @@ -238,7 +236,7 @@ def test_remove_from_tallies(self): } ] result_3 = remove_from_tallies(test_entry_3, copy(tallies)) - assert(result_3, expected_3) + assert result_3 == expected_3 def test_get_primary_language(self): language_tallies = [ @@ -257,19 +255,18 @@ def test_get_primary_language(self): ] expected_1 = 'Python' result_1 = get_primary_language(language_tallies) - assert(result_1, expected_1) + assert result_1 == expected_1 def test_get_holders_from_copyright(self): - test_copyright = 'Copyright (c) 2017, The University of Chicago. All rights reserved.' - expected_1 = ['The University of Chicago'] - result_1 = get_holders_from_copyright(test_copyright) - assert(result_1, expected_1) + test_copyright_string = 'Copyright (c) 2017, The University of Chicago. All rights reserved.' + result = list(get_holders_from_copyright(test_copyright_string)) + assert result == ['The University of Chicago'] - test_copyrights = [ + test_copyright_lines = [ 'Copyright (c) 2017, The University of Chicago. All rights reserved.', 'Copyright (c) MIT', 'Copyright (c) Apache Software Foundation', ] - expected_2 = ['The University of Chicago', 'MIT', 'Apache Software Foundation'] - result_2 = get_holders_from_copyright(test_copyrights) - assert(result_2, expected_2) + + result = list(get_holders_from_copyright(test_copyright_lines)) + assert result == ['The University of Chicago', 'MIT', 'Apache Software Foundation'] diff --git a/tests/textcode/test_strings.py b/tests/textcode/test_strings.py index 4021ffe1859..e174249e99d 100644 --- a/tests/textcode/test_strings.py +++ b/tests/textcode/test_strings.py @@ -11,12 +11,14 @@ import json import os +import pytest from commoncode.testcase import FileBasedTesting from scancode_config import REGEN_TEST_FIXTURES from textcode import strings + class TestStrings(FileBasedTesting): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') @@ -159,14 +161,20 @@ def test_strings_in_all_bin(self): expected_file = os.path.join(expec_dir, tf + '.strings') self.check_file_strings(test_file, expected_file) - def test_is_relative_path(self): + def test_is_relative_path_win(self): # Win Path path = "c:\\usr\\lib\\librt.so.1." - self.assertFalse(strings.is_relative_path(path)) + assert not strings.is_relative_path(path) + + @pytest.mark.xfail(reason="is_relative_path is not implemented on Windows") + def test_is_relative_path_win2(self): + path = "usr\\lib\\librt.so.1." + assert strings.is_relative_path(path) is True + def test_is_relative_path_posix(self): # Relative Posix Path path = "usr/lib/librt.so.1" - self.assertTrue(strings.is_relative_path(path)) + assert strings.is_relative_path(path) is True def test_strings_with_lf(self): test_file = 'strings/with-lf/strings.exe'