Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Omnibus license updates July/Aug 21 #2626

Merged
merged 21 commits into from
Aug 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
19 changes: 13 additions & 6 deletions etc/scripts/licenses/synclic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from commoncode import fileutils

import licensedcode
from licensedcode import models
from licensedcode.models import load_licenses
from licensedcode.models import License

Expand Down Expand Up @@ -76,6 +77,7 @@ def _clean(licenses):
lic.notes = clean_text(lic.notes)

if updated:
models.update_ignorables(lic, verbose=False)
lic.dump()

for lics in [self.by_key, self.non_english_by_key]:
Expand Down Expand Up @@ -174,6 +176,7 @@ def get_licenses(self, scancode_licenses=None, **kwargs):
try:
with io.open(lic.text_file, 'w', encoding='utf-8')as tf:
tf.write(text)
models.update_ignorables(lic, verbose=False)
lic.dump()
licenses.append(lic)
except:
Expand Down Expand Up @@ -546,7 +549,7 @@ def build_license(self, mapping, scancode_licenses):
# instead each part of the combo
dejacode_special_composites = set([
'intel-bsd-special',
#'newlib-subdirectory',
# 'newlib-subdirectory',
])
is_component_license = mapping.get('is_component_license') or False

Expand Down Expand Up @@ -816,8 +819,8 @@ def license_to_dict(lico):


def merge_licenses(
scancode_license,
external_license,
scancode_license,
external_license,
updatable_attributes,
from_spdx=False,
):
Expand Down Expand Up @@ -948,7 +951,7 @@ def update_external(_attrib, _sc_val, _ext_val):
# on difference, the other license wins
if scancode_value != external_value:
# unless we have SPDX ids
if attrib== 'spdx_license_key' and external_value.startswith('LicenseRef-scancode'):
if attrib == 'spdx_license_key' and external_value.startswith('LicenseRef-scancode'):
update_external(attrib, scancode_value, external_value)
else:
update_scancode(attrib, scancode_value, external_value)
Expand Down Expand Up @@ -1138,10 +1141,14 @@ def synchronize_licenses(scancode_licenses, external_source, use_spdx_key=False,

# finally write changes in place for updates and news
for k in updated_in_scancode | added_to_scancode:
scancodes_by_key[k].dump()
lic = scancodes_by_key[k]
models.update_ignorables(lic, verbose=False)
lic.dump()

for k in updated_in_external | added_to_external:
externals_by_key[k].dump()
lic = externals_by_key[k]
# models.update_ignorables(lic, verbose=False)
lic.dump()

# TODO: at last: print report of incorrect OTHER licenses to submit
# updates eg. make API calls to DejaCode to create or update
Expand Down
85 changes: 52 additions & 33 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,9 @@ def from_node(
(r'^Create$', 'NN'),
(r'^Engine\.$', 'NN'),
(r'^While$', 'NN'),
(r'^Review', 'NN'),
(r'^Help', 'NN'),
(r'^Web', 'NN'),

# alone this is not enough for an NNP
(r'^Free$', 'NN'),
Expand Down Expand Up @@ -1302,34 +1305,34 @@ def from_node(
(r'^LIMITED[,\.]??$', 'COMP'),

# Caps company suffixes
(r'^INC\.?,?\)?$', 'COMP'),
(r'^INCORPORATED\.?,?\)?$', 'COMP'),
(r'^CORP\.?,?\)?$', 'COMP'),
(r'^CORPORATION\.?,?\)?$', 'COMP'),
(r'^FOUNDATION\.?,?$', 'COMP'),
(r'^GROUP\.?,?$', 'COMP'),
(r'^COMPANY\.?,?$', 'COMP'),
(r'^\(tm\).?$', 'COMP'),
(r'^[Ff]orum\.?,?', 'COMP'),
(r'^INC[\.,\)]*$', 'COMP'),
(r'^INCORPORATED[\.,\)]*$', 'COMP'),
(r'^CORP[\.,\)]*$', 'COMP'),
(r'^CORPORATION[\.,\)]*$', 'COMP'),
(r'^FOUNDATION[\.,\)]*$', 'COMP'),
(r'^GROUP[\.,\)]*$', 'COMP'),
(r'^COMPANY[\.,\)]*$', 'COMP'),
(r'^\(tm\)[\.,]?$', 'COMP'),
(r'^[Ff]orum[\.,\)]*', 'COMP'),

# company suffix
(r'^[Cc]orp\.?,?\)?$', 'COMP'),
(r'^[Cc]orp(oration|\.,?)?\)?$', 'COMP'),
(r'^[Cc][oO]\.,?$', 'COMP'),
(r'^[Cc]orporations?\.?,?$', 'COMP'),
(r'^[Ff]oundation\.?,?$', 'COMP'),
(r'^[Aa]lliance\.?,?$', 'COMP'),
(r'^[Cc]orp[\.,\)]*$', 'COMP'),
(r'^[Cc]orporation[\.,\)]*$', 'COMP'),
(r'^[Cc][oO][\.,\)]*$', 'COMP'),
(r'^[Cc]orporations?[\.,\)]*$', 'COMP'),
(r'^[Cc]onsortium[\.,\)]*$', 'COMP'),

(r'^[Ff]oundation[\.,\)]*$', 'COMP'),
(r'^[Aa]lliance[\.,\)]*$', 'COMP'),
(r'^Working$', 'COMP'),
(r'^[Gg]roup\.?,?$', 'COMP'),
(r'^[Tt]echnology\.?,?$', 'COMP'),
(r'^[Tt]echnologies\.?,?$', 'COMP'),
(r'^[Cc]ommunity\.?,?$', 'COMP'),
(r'^[Cc]ommunities\.?,?$', 'COMP'),
(r'^[Mm]icrosystems\.?,?$', 'COMP'),
(r'^[Pp]rojects?\.?,?$', 'COMP'),
(r'^[Tt]eams?\.?$', 'COMP'),
(r'^[Tt]ech\.?,?$', 'COMP'),
(r"^Limited'?\.?,?$", 'COMP'),
(r'^[Gg]roup[\.,\)]*$', 'COMP'),
(r'^[Tt]echnolog(y|ies)[\.,\)]*$', 'COMP'),
(r'^[Cc]ommunit(y|ies)[\.,\)]*$', 'COMP'),
(r'^[Mm]icrosystems[\.,\)]*$', 'COMP'),
(r'^[Pp]rojects?[\.,\)]*,?$', 'COMP'),
(r'^[Tt]eams?[\.,\)]*$', 'COMP'),
(r'^[Tt]ech[\.,\)]*$', 'COMP'),
(r"^Limited'?[\.,\)]*$", 'COMP'),

# company suffix : LLC, LTD, LLP followed by one extra char
(r'^[Ll][Tt][Dd]\.?,?$', 'COMP'),
Expand All @@ -1341,7 +1344,8 @@ def from_node(

# company suffix : SA, SAS, AG, AB, AS, CO, labs followed by a dot
(r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe)\.?$', 'COMP'),

# French SARL
(r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$', 'COMP'),
# company suffix : AS: this is frequent beyond Norway.
(r'^AS.$', 'COMP'),
(r'^AS', 'CAPS'),
Expand All @@ -1361,11 +1365,13 @@ def from_node(
# (dutch and belgian) company suffix
(r'^[Bb]\.?[Vv]\.?|BVBA$', 'COMP'),
# university
(r'^\(?[Uu]niv(?:[.]|ersit(?:y|e|at?|ad?))\)?\.?$', 'UNI'),
(r'^\(?[Uu]niv(?:[.]|ersit(?:y|e|at?|ad?))[\.,\)]*$', 'UNI'),
(r'^UNIVERSITY$', 'UNI'),
(r'^College$', 'UNI'),
# Academia/ie
(r'^[Ac]cademi[ae]s?$', 'UNI'),
# Academia/ie
(r'^[Ac]cademy[\.,\)]*$', 'UNI'),

# institutes
(r'INSTITUTE', 'COMP'),
Expand Down Expand Up @@ -1612,11 +1618,11 @@ def from_node(
# URLS such as <(http://fedorahosted.org/lohit)> or ()
(r'[<\(]https?:.*[>\)]', 'URL'),
# URLS such as ibm.com without a scheme
(r'\s?[a-z0-9A-Z\-\.\_]+\.([Cc][Oo][Mm]|[Nn][Ee][Tt]|[Oo][Rr][Gg]|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|de|be|nl|au|biz)\s?\.?$', 'URL2'),
(r'\s?[a-z0-9A-Z\-\.\_]+\.([Cc][Oo][Mm]|[Nn][Ee][Tt]|[Oo][Rr][Gg]|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|de|be|se|nl|au|biz)\s?\.?$', 'URL2'),
# TODO: add more extensions: there are so main TLD these days!
# URL wrapped in () or <>
(r'[\(<]+\s?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|nl|au|biz)\s?[\.\)>]+$', 'URL'),
(r'<?a?.(href)?.\(?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|nl|au|biz)[\.\)>]?$', 'URL'),
(r'[\(<]+\s?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|se|nl|au|biz)\s?[\.\)>]+$', 'URL'),
(r'<?a?.(href)?.\(?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|se|nl|au|biz)[\.\)>]?$', 'URL'),
# derived from regex in cluecode.finder
(r'<?a?.(href)?.('
r'(?:http|ftp|sftp)s?://[^\s<>\[\]"]+'
Expand Down Expand Up @@ -1762,7 +1768,7 @@ def from_node(
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
COMPANY: {<NNP> <NNP> <CC> <NNP> <COMP> <NNP> <CAPS>}

COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>?} #200
COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>*} #200

# Android Open Source Project, 3Dfx Interactive, Inc.
COMPANY: {<NN>? <NN> <NNP> <COMP>} #205
Expand All @@ -1773,8 +1779,14 @@ def from_node(
# NNP NN NNP NNP COMP COMP')
COMPANY: {<NNP> <NN> <NNP> <NNP> <COMP>+} #207

# was COMPANY {<NNP|CAPS> <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <COMP> <COMP>?} #210
COMPANY: {<NNP|CAPS>+ <COMP>+} #210
# Massachusetts Institute of Technology
COMPANY: {<NNP> <COMP|COMPANY> <OF> <NNP>+} #208

COMPANY: {<NNP|CAPS>+ <COMP|COMPANY>+} #210

# University of Southern California, Information Sciences Institute (ISI)
COMPANY: {<UNI> <OF> <COMPANY> <CAPS>?} #211

COMPANY: {<UNI|NNP> <VAN|OF> <NNP>+ <UNI>?} #220
COMPANY: {<NNP>+ <UNI>} #230
COMPANY: {<UNI> <OF> <NN|NNP>} #240
Expand All @@ -1783,6 +1795,9 @@ def from_node(
# University of Southern California, Information Sciences Institute (ISI)
COMPANY: {<COMPANY> <COMPANY> <CAPS>} #251

# University of Technology
COMPANY: {<UNI> <OF> <COMP|COMPANY>} #252

# GNOME i18n Project for Vietnamese
COMPANY: {<CAPS> <NN> <COMP> <NN> <NNP>} #253

Expand Down Expand Up @@ -1937,6 +1952,9 @@ def from_node(

COMPANY: {<COMPANY> <COMP|COMPANY>} #840

# the Software and Component Technologies group of Trimble Navigation, Ltd.
COMPANY: {<COMPANY> <OF> <COMP|COMPANY>} #840.1

# University Corporation for Advanced Internet Development, Inc.
COMPANY: {<UNI> <COMPANY>} #845

Expand Down Expand Up @@ -2783,6 +2801,7 @@ def refine_names(s, prefixes):
'copyright 2003 m. y.',
'copyright 2001 m. y. name',
'copyright 2001 m. y.',
'copyright help center',
])

################################################################################
Expand Down
3 changes: 3 additions & 0 deletions src/licensedcode/data/licenses/bsd-4-clause-shortened.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ owner: Regents of the University of California
spdx_license_key: BSD-4-Clause-Shortened
other_urls:
- https://metadata.ftp-master.debian.org/changelogs//main/a/arpwatch/arpwatch_2.1a15-7_copyright
ignorable_authors:
- the University of California, Lawrence Berkeley Laboratory and its contributors

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,5 @@ category: Proprietary Free
owner: Thufie
homepage_url: https://thufie.lain.haus/NPL.html
spdx_license_key: LicenseRef-scancode-cooperative-non-violent-4.0
ignorable_authors:
- the Web Service
ignorable_urls:
- https://thufie.lain.haus/NPL.html
Loading