Skip to content

Commit 0531e63

Browse files
committed
Add scancode evaluate script
The goal: check license offenders in pull request This is similar to what astyle does in Travis. We get list of files being changed. Because scancode does not support list of files being scanned but rather a file or directory, we copy files to SCANCODE folder. Execute scancode license check in this folder and check for offenders. The rules there are: code files must have a license and SDPX identifier. If they don't, we print these and ask for review. It functions nicely there is just one workaround needed. SPDX is not always 100 percent correctly found, therefore we recheck file if no SPDX manually in the script. This proves to remove false positives.
1 parent bac5ffe commit 0531e63

File tree

2 files changed

+141
-3
lines changed

2 files changed

+141
-3
lines changed

.travis.yml

+17-3
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,24 @@ matrix:
5555
- <<: *basic-vm
5656
name: "license check"
5757
env: NAME=licence_check
58+
language: python
59+
python: 3.6.8 # scancode-toolkit v3.1.1 requires v3.6.8
60+
install:
61+
- pip install scancode-toolkit==3.1.1
62+
before_script:
63+
- mkdir -p SCANCODE
64+
# Fetch remaining information needed for branch comparison
65+
- git fetch --all --unshallow --tags
66+
- git fetch origin "${TRAVIS_BRANCH}"
5867
script:
59-
- |
60-
! grep --recursive --max-count=100 --ignore-case --exclude .travis.yml \
61-
"gnu general\|gnu lesser\|lesser general\|public license"
68+
# scancode does not support list of files, only one file or directory
69+
# we use SCANCODE directory for all changed files (their copies with full tree)
70+
- >-
71+
git diff --name-only --diff-filter=d FETCH_HEAD..HEAD \
72+
| ( grep '.\(c\|cpp\|h\|hpp\|py\)$' || true ) \
73+
| while read file; do cp --parents "${file}" SCANCODE; done
74+
- scancode -l --json-pp scancode.json SCANCODE
75+
- python ./tools/test/travis-ci/scancode-evaluate.py -f scancode.json
6276

6377
- <<: *basic-vm
6478
name: "include check"
+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
SPDX-License-Identifier: Apache-2.0
3+
4+
Copyright (c) 2020 Arm Limited. All rights reserved.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations
17+
"""
18+
19+
# Asumptions for this script:
20+
# 1. directory_name is scanned directory.
21+
# Files are copied to this directory with full tree. As result, if we find
22+
# license offender, we can have full path (just scrape directory_name). We do this
23+
# magic because scancode allows to scan directories/one file.
24+
# 2. SPDX and license text is a must for all code files
25+
26+
import json
27+
import argparse
28+
import sys
29+
import os.path
30+
import logging
31+
import re
32+
33+
userlog = logging.getLogger("scancode-evaluate")
34+
userlog.setLevel(logging.INFO)
35+
36+
MISSING_LICENSE_TEXT = "Missing license header"
37+
MISSING_PERMISIVE_LICENSE_TEXT = "Non-permissive license"
38+
MISSING_SPDX_TEXT = "Missing SPDX license identifier"
39+
40+
def license_check(directory_name, file):
41+
""" Check licenses in the scancode json file for specified directory
42+
43+
This function does not verify if file exists, should be done prior the call.
44+
45+
Args:
46+
directory_name - where scancode was run, used to scrape this from paths
47+
file - scancode json output file (output from scancode --license --json-pp)
48+
49+
Returns:
50+
0 if success, -1 if any error in file licenses found
51+
"""
52+
53+
offenders = []
54+
try:
55+
# find all licenses in the files, must be licensed and permissive
56+
with open(file, 'r') as scancode_output:
57+
results = json.load(scancode_output)
58+
except ValueError:
59+
userlog.warning("JSON could not be decoded")
60+
return -1
61+
62+
try:
63+
for file in results['files']:
64+
license_offender = {}
65+
license_offender['file'] = file
66+
# ignore directory, not relevant here
67+
if license_offender['file']['type'] == 'directory':
68+
continue
69+
if not license_offender['file']['licenses']:
70+
license_offender['reason'] = MISSING_LICENSE_TEXT
71+
offenders.append(license_offender)
72+
continue
73+
74+
found_spdx = False
75+
for i in range(len(license_offender['file']['licenses'])):
76+
if license_offender['file']['licenses'][i]['category'] != 'Permissive':
77+
license_offender['reason'] = MISSING_PERMISIVE_LICENSE_TEXT
78+
offenders.append(license_offender)
79+
# find SPDX, it shall be one of licenses found
80+
if license_offender['file']['licenses'][i]['matched_rule']['identifier'].find("spdx") != -1:
81+
found_spdx = True
82+
83+
if not found_spdx:
84+
# Issue reported here https://github.com/nexB/scancode-toolkit/issues/1913
85+
# We verify here if SPDX is not really there as SDPX is part of the license text
86+
# scancode has some problems detecting it properly
87+
with open(os.path.join(os.path.abspath(license_offender['file']['path'])), 'r') as spdx_file_check:
88+
filetext = spdx_file_check.read()
89+
matches = re.findall("SPDX-License-Identifier:?", filetext)
90+
if matches:
91+
continue
92+
license_offender['reason'] = MISSING_SPDX_TEXT
93+
offenders.append(license_offender)
94+
95+
except KeyError:
96+
userlog.warning("Invalid scancode json file")
97+
return -1
98+
99+
if offenders:
100+
userlog.warning("Found files with missing license details, please review and fix")
101+
for offender in offenders:
102+
userlog.warning("File: " + offender['file']['path'][len(directory_name):] + " " + "reason: " + offender['reason'])
103+
return -1
104+
else:
105+
return 0
106+
107+
def parse_args():
108+
parser = argparse.ArgumentParser(
109+
description="License check.")
110+
parser.add_argument('-f', '--file',
111+
help="scancode-toolkit output json file")
112+
parser.add_argument('-d', '--directory_name', default="SCANCODE",
113+
help='Directory name where are files being checked')
114+
return parser.parse_args()
115+
116+
if __name__ == "__main__":
117+
118+
args = parse_args()
119+
120+
if args.file and os.path.isfile(args.file):
121+
sys.exit(license_check(args.directory_name, args.file))
122+
else:
123+
userlog.warning("Could not find the scancode json file")
124+
sys.exit(-1)

0 commit comments

Comments
 (0)