Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce output data format versioning #2653 #2682

Merged
merged 11 commits into from
Sep 14, 2021
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ cffi==1.14.5
chardet==4.0.0
click==8.0.1
colorama==0.4.4
commoncode==21.8.27
commoncode==21.8.31
construct==2.10.67
cryptography==3.4.7
debian-inspector==21.5.25
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ install_requires =
chardet >= 3.0.0
click >= 6.7, !=7.0
colorama >= 0.3.9
commoncode >= 21.8.27
commoncode >= 21.8.31
debian-inspector >= 21.5.25
dparse >= 0.5.1
fasteners
Expand Down
14 changes: 14 additions & 0 deletions src/scancode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def print_version(ctx, param, value):
if not value or ctx.resilient_parsing:
return
click.echo('ScanCode version ' + scancode_config.__version__)
click.echo('Output Format version ' + scancode_config.__output_format_version__)
click.echo('Future Output Format version ' + scancode_config.__future_output_format_version__)
ctx.exit()


Expand Down Expand Up @@ -249,6 +251,13 @@ def validate_depth(ctx, param, value):
'the starting directory. Use 0 for no scan depth limit.',
help_group=cliutils.CORE_GROUP, sort_order=301, cls=PluggableCommandLineOption)

@click.option('--future-format',
is_flag=True,
help='Output the next experimental data format, for JSON and YAML output.'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use future and not experimental, here and elsewhere

'See CHANGELOG for more details on the changes in this experimental data format.',
help_group=cliutils.OUTPUT_CONTROL_GROUP, sort_order=28, cls=PluggableCommandLineOption)


@click.help_option('-h', '--help',
help_group=cliutils.DOC_GROUP, sort_order=10, cls=PluggableCommandLineOption)

Expand Down Expand Up @@ -345,6 +354,7 @@ def scancode(
verbose,
max_depth,
from_json,
future_format,
timing,
max_in_memory,
test_mode,
Expand Down Expand Up @@ -448,6 +458,7 @@ def scancode(
quiet=quiet,
verbose=verbose,
max_depth=max_depth,
future_format=future_format,
timing=timing,
max_in_memory=max_in_memory,
test_mode=test_mode,
Expand Down Expand Up @@ -484,6 +495,7 @@ def scancode(
def run_scan(
input, # NOQA
from_json=False,
future_format=False,
strip_root=False,
full_root=False,
max_in_memory=10000,
Expand Down Expand Up @@ -590,6 +602,7 @@ def echo_func(*_args, **_kwargs):
quiet=quiet,
verbose=verbose,
from_json=from_json,
future_format=future_format,
timing=timing,
max_in_memory=max_in_memory,
test_mode=test_mode,
Expand Down Expand Up @@ -838,6 +851,7 @@ def echo_func(*_args, **_kwargs):
cle.start_timestamp = start_timestamp
cle.tool_name = 'scancode-toolkit'
cle.tool_version = scancode_config.__version__
cle.output_format_version = scancode_config.__output_format_version__
cle.notice = notice
cle.options = pretty_params or {}

Expand Down
5 changes: 5 additions & 0 deletions src/scancode/cli_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ def check_jsonlines_scan(
result_file,
regen=False,
remove_file_date=False,
ignore_headers=False,
):
"""
Check the scan result_file JSON Lines results against the expected_file
Expand All @@ -311,6 +312,10 @@ def check_jsonlines_scan(
expected = json.load(res)

streamline_jsonlines_scan(expected, remove_file_date)

if ignore_headers:
results[0].pop('headers', None)
expected[0].pop('headers', None)

expected = json.dumps(expected, indent=2, separators=(',', ': '))
results = json.dumps(results, indent=2, separators=(',', ': '))
Expand Down
7 changes: 7 additions & 0 deletions src/scancode_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ def _create_dir(location):
# in case package is not installed or we do not have setutools/pkg_resources
# on hand fall back to this version
__version__ = '21.8.4'

# See https://github.com/nexB/scancode-toolkit/issues/2653 for more information
# on the data format version
__output_format_version__ = '1.1'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's use 1.0

__future_output_format_version__ = '1.1'


try:
from pkg_resources import get_distribution, DistributionNotFound
try:
Expand Down
8 changes: 4 additions & 4 deletions tests/cluecode/test_plugin_email_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,28 @@ def test_scan_email():
result_file = test_env.get_temp_file('json')
args = ['--email', '--strip-root', test_dir, '--json', result_file]
run_scan_click(args)
check_json_scan(test_env.get_test_loc('plugin_email_url/emails.expected.json'), result_file)
check_json_scan(test_env.get_test_loc('plugin_email_url/emails.expected.json'), result_file, ignore_headers=True)


def test_scan_email_with_threshold():
test_dir = test_env.get_test_loc('plugin_email_url/files')
result_file = test_env.get_temp_file('json')
args = ['--email', '--strip-root', '--max-email', '2', test_dir, '--json', result_file]
run_scan_click(args)
check_json_scan(test_env.get_test_loc('plugin_email_url/emails-threshold.expected.json'), result_file)
check_json_scan(test_env.get_test_loc('plugin_email_url/emails-threshold.expected.json'), result_file, ignore_headers=True)


def test_scan_url():
test_dir = test_env.get_test_loc('plugin_email_url/files')
result_file = test_env.get_temp_file('json')
args = ['--url', '--strip-root', test_dir, '--json', result_file]
run_scan_click(args)
check_json_scan(test_env.get_test_loc('plugin_email_url/urls.expected.json'), result_file)
check_json_scan(test_env.get_test_loc('plugin_email_url/urls.expected.json'), result_file, ignore_headers=True)


def test_scan_url_with_threshold():
test_dir = test_env.get_test_loc('plugin_email_url/files')
result_file = test_env.get_temp_file('json')
args = ['--url', '--strip-root', '--max-url', '2', test_dir, '--json', result_file]
run_scan_click(args)
check_json_scan(test_env.get_test_loc('plugin_email_url/urls-threshold.expected.json'), result_file)
check_json_scan(test_env.get_test_loc('plugin_email_url/urls-threshold.expected.json'), result_file, ignore_headers=True)
4 changes: 2 additions & 2 deletions tests/cluecode/test_plugin_ignore_copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def test_ignore_holders(self):
result_file = self.get_temp_file('json')
expected_file = self.get_test_loc('plugin_ignore_copyrights/holders.expected.json')
run_scan_click(['-c', '--ignore-copyright-holder', 'Regents', '--json-pp', result_file, test_dir])
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False, ignore_headers=True)

def test_ignore_authors(self):
test_dir = self.extract_test_tar('plugin_ignore_copyrights/basic.tgz')
result_file = self.get_temp_file('json')
expected_file = self.get_test_loc('plugin_ignore_copyrights/authors.expected.json')
run_scan_click(['-c', '--ignore-author', 'Berkeley', '--json-pp', result_file, test_dir])
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False, ignore_headers=True)
8 changes: 4 additions & 4 deletions tests/formattedcode/test_output_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_json_pretty_print():
args = ['-clip', test_dir, '--json-pp', result_file]
run_scan_click(args)
expected = test_env.get_test_loc('json/simple-expected.jsonpp')
check_json_scan(expected, result_file, remove_file_date=True, regen=False)
check_json_scan(expected, result_file, remove_file_date=True, regen=False, ignore_headers=True)


def test_json_compact():
Expand All @@ -37,7 +37,7 @@ def test_json_compact():
with open(result_file, 'rb') as res:
assert len(res.read().splitlines()) == 1
expected = test_env.get_test_loc('json/simple-expected.json')
check_json_scan(expected, result_file, remove_file_date=True, regen=False)
check_json_scan(expected, result_file, remove_file_date=True, regen=False, ignore_headers=True)


@pytest.mark.scanslow
Expand All @@ -46,7 +46,7 @@ def test_scan_output_does_not_truncate_copyright_json():
result_file = test_env.get_temp_file('test.json')
run_scan_click(['-clip', '--strip-root', test_dir, '--json-pp', result_file])
expected = test_env.get_test_loc('json/tree/expected.json')
check_json_scan(expected, result_file, remove_file_date=True, regen=False)
check_json_scan(expected, result_file, remove_file_date=True, regen=False, ignore_headers=True)


@pytest.mark.scanslow
Expand All @@ -56,7 +56,7 @@ def test_scan_output_does_not_truncate_copyright_with_json_to_stdout():
args = ['-clip', '--strip-root', test_dir, '--json-pp', result_file]
run_scan_click(args)
expected = test_env.get_test_loc('json/tree/expected.json')
check_json_scan(expected, result_file, remove_file_date=True, regen=False)
check_json_scan(expected, result_file, remove_file_date=True, regen=False, ignore_headers=True)


@pytest.mark.scanslow
Expand Down
2 changes: 1 addition & 1 deletion tests/formattedcode/test_output_jsonlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_jsonlines():
expected = test_env.get_test_loc('json/simple-expected.jsonlines')
check_jsonlines_scan(
test_env.get_test_loc(expected), result_file,
remove_file_date=True, regen=False)
remove_file_date=True, regen=False, ignore_headers=True)


def test_jsonlines_with_timing():
Expand Down
20 changes: 10 additions & 10 deletions tests/licensedcode/test_plugin_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_license_option_reports_license_expressions():
args = ['--license', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/license-expression/scan.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_license_option_reports_license_texts():
Expand All @@ -42,7 +42,7 @@ def test_license_option_reports_license_texts():
args = ['--license', '--license-text', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/text/scan.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_license_option_reports_license_texts_diag():
Expand All @@ -51,7 +51,7 @@ def test_license_option_reports_license_texts_diag():
args = ['--license', '--license-text', '--license-text-diagnostics', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/text/scan-diag.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_license_option_reports_license_texts_long_lines():
Expand All @@ -60,7 +60,7 @@ def test_license_option_reports_license_texts_long_lines():
args = ['--license', '--license-text', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/text_long_lines/scan.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_license_option_reports_license_texts_diag_long_lines():
Expand All @@ -69,7 +69,7 @@ def test_license_option_reports_license_texts_diag_long_lines():
args = ['--license', '--license-text', '--license-text-diagnostics', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/text_long_lines/scan-diag.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_license_match_reference():
Expand All @@ -78,7 +78,7 @@ def test_license_match_reference():
args = ['--license', '--license-text', '--license-text-diagnostics', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/license_reference/scan-ref.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_license_match_without_reference():
Expand All @@ -87,7 +87,7 @@ def test_license_match_without_reference():
args = ['--license', '--license-text', '--license-text-diagnostics', '--strip-root', test_dir, '--json', result_file, '--verbose']
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/license_reference/scan-wref.expected.json')
check_json_scan(test_loc, result_file, regen=False)
check_json_scan(test_loc, result_file, regen=False, ignore_headers=True)


def test_get_referenced_filenames():
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_scan_license_with_url_template():
test_dir, '--json-pp', result_file]
test_loc = test_env.get_test_loc('plugin_license/license_url.expected.json')
run_scan_click(args)
check_json_scan(test_loc, result_file)
check_json_scan(test_loc, result_file, ignore_headers=True)


@pytest.mark.scanslow
Expand All @@ -155,7 +155,7 @@ def test_detection_does_not_timeout_on_sqlite3_amalgamation():
expected_file = test_env.get_test_loc('plugin_license/sqlite/sqlite.expected.json')
# we use the default 120 seconds timeout
run_scan_click(['-l', '--license-text', '--json-pp', result_file, test_dir])
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False, ignore_headers=True)


@pytest.mark.scanslow
Expand All @@ -164,4 +164,4 @@ def test_detection_is_correct_in_legacy_npm_package_json():
result_file = test_env.get_temp_file('json')
expected_file = test_env.get_test_loc('plugin_license/package/package.expected.json')
run_scan_click(['-lp', '--json-pp', result_file, test_dir])
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
check_json_scan(expected_file, result_file, remove_file_date=True, regen=False, ignore_headers=True)
2 changes: 1 addition & 1 deletion tests/licensedcode/test_plugin_license_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ def test_is_licensing_works():
test_dir, '--json-pp', result_file, '--verbose']
run_scan_click(args)
check_json_scan(test_env.get_test_loc('plugin_license_text/scan.expected.json'),
result_file, remove_file_date=True, regen=False)
result_file, remove_file_date=True, regen=False, ignore_headers=True)
4 changes: 2 additions & 2 deletions tests/packagedcode/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def test_end2end_scan_can_detect_bazel(self):
expected_file = self.get_test_loc('bazel/end2end-expected.json')
result_file = self.get_temp_file('results.json')
run_scan_click(['--package', test_file, '--json-pp', result_file])
check_json_scan(expected_file, result_file, regen=False)
check_json_scan(expected_file, result_file, regen=False, ignore_headers=True)

def test_end2end_scan_can_detect_buck(self):
test_file = self.get_test_loc('buck/end2end')
expected_file = self.get_test_loc('buck/end2end-expected.json')
result_file = self.get_temp_file('results.json')
run_scan_click(['--package', test_file, '--json-pp', result_file])
check_json_scan(expected_file, result_file, regen=False)
check_json_scan(expected_file, result_file, regen=False, ignore_headers=True)

def test_build_get_package_resources(self):
test_loc = self.get_test_loc('get_package_resources')
Expand Down
Loading