diff --git a/src/licensedcode/cache.py b/src/licensedcode/cache.py index 000accc8257..a26913ba42e 100644 --- a/src/licensedcode/cache.py +++ b/src/licensedcode/cache.py @@ -6,9 +6,10 @@ # See https://github.com/nexB/scancode-toolkit for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import click + import os import pickle +from shutil import rmtree import attr @@ -53,6 +54,7 @@ class LicenseCache: @staticmethod def load_or_build( + only_builtin=False, licensedcode_cache_dir=licensedcode_cache_dir, scancode_cache_dir=scancode_cache_dir, force=False, @@ -81,6 +83,9 @@ def load_or_build( directories using the same format that we use for licenses and rules. """ idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR) + if only_builtin: + rmtree(idx_cache_dir) + create_dir(idx_cache_dir) cache_file = os.path.join(idx_cache_dir, LICENSE_INDEX_FILENAME) @@ -119,15 +124,19 @@ def load_or_build( # Here, the cache is either stale or non-existing: we need to # rebuild all cached data (e.g. mostly the index) and cache it - additional_directories = [] - plugin_directories = get_paths_to_installed_licenses_and_rules() - if plugin_directories: - additional_directories.extend(plugin_directories) + if only_builtin: + additional_directory = None + additional_directories = [] + plugin_directories = [] + else: + plugin_directories = get_paths_to_installed_licenses_and_rules() + if plugin_directories: + additional_directories.extend(plugin_directories) - # include installed licenses - if additional_directory: - # additional_directories is originally a tuple - additional_directories.append(additional_directory) + # include installed licenses + if additional_directory: + # additional_directories is originally a tuple + additional_directories.append(additional_directory) additional_license_dirs = get_license_dirs(additional_dirs=additional_directories) validate_additional_license_data( @@ -355,7 +364,12 @@ def build_unknown_spdx_symbol(licenses_db=None): return LicenseSymbolLike(licenses_db['unknown-spdx']) -def get_cache(force=False, index_all_languages=False, additional_directory=None): +def get_cache( + only_builtin=False, + force=False, + index_all_languages=False, + additional_directory=None +): """ Return a LicenseCache either rebuilt, cached or loaded from disk. @@ -364,13 +378,19 @@ def get_cache(force=False, index_all_languages=False, additional_directory=None) texts and rules (the default) """ return populate_cache( + only_builtin=only_builtin, force=force, index_all_languages=index_all_languages, additional_directory=additional_directory, ) -def populate_cache(force=False, index_all_languages=False, additional_directory=None): +def populate_cache( + only_builtin=False, + force=False, + index_all_languages=False, + additional_directory=None +): """ Return, load or build and cache a LicenseCache. """ @@ -378,6 +398,7 @@ def populate_cache(force=False, index_all_languages=False, additional_directory= if force or not _LICENSE_CACHE: _LICENSE_CACHE = LicenseCache.load_or_build( + only_builtin=only_builtin, licensedcode_cache_dir=licensedcode_cache_dir, scancode_cache_dir=scancode_cache_dir, force=force, @@ -407,11 +428,17 @@ def load_cache_file(cache_file): raise Exception(msg) from e -def get_index(force=False, index_all_languages=False, additional_directory=None): +def get_index( + only_builtin=False, + force=False, + index_all_languages=False, + additional_directory=None +): """ Return and eventually build and cache a LicenseIndex. """ return get_cache( + only_builtin=only_builtin, force=force, index_all_languages=index_all_languages, additional_directory=additional_directory diff --git a/src/licensedcode/reindex.py b/src/licensedcode/reindex.py index 47dcc94b01e..a32e5a9233f 100644 --- a/src/licensedcode/reindex.py +++ b/src/licensedcode/reindex.py @@ -9,6 +9,8 @@ import click +from commoncode.cliutils import PluggableCommandLineOption + @click.command(name='scancode-reindex-licenses') @click.option( @@ -16,6 +18,17 @@ is_flag=True, help='[EXPERIMENTAL] Rebuild the license index including texts all ' 'languages (and not only English) and exit.', + cls=PluggableCommandLineOption, +) +@click.option( + '--only-builtin', + is_flag=True, + help='Rebuild the license index excluding any additional ' + 'license directory or additional license plugins which' + 'were added previously, i.e. with only builtin scancode ' + 'license and rules.', + conflicting_options=['additional_directory'], + cls=PluggableCommandLineOption, ) @click.option( '--additional-directory', @@ -23,9 +36,12 @@ metavar='DIR', help='Include this directory with additional custom licenses and license rules ' 'in the license detection index.', + conflicting_options=['only_builtin'], + cls=PluggableCommandLineOption, ) @click.help_option('-h', '--help') def reindex_licenses( + only_builtin, all_languages, additional_directory, *args, @@ -35,7 +51,12 @@ def reindex_licenses( from licensedcode.cache import get_index click.echo('Rebuilding the license index...') - get_index(force=True, index_all_languages=bool(all_languages), additional_directory=additional_directory) + get_index( + only_builtin=only_builtin, + force=True, + index_all_languages=bool(all_languages), + additional_directory=additional_directory + ) click.echo('Done.')