Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
19 changes: 13 additions & 6 deletions etc/scripts/licenses/synclic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from commoncode import fileutils

import licensedcode
from licensedcode import models
from licensedcode.models import load_licenses
from licensedcode.models import License

Expand Down Expand Up @@ -76,6 +77,7 @@ def _clean(licenses):
lic.notes = clean_text(lic.notes)

if updated:
models.update_ignorables(lic, verbose=False)
lic.dump()

for lics in [self.by_key, self.non_english_by_key]:
Expand Down Expand Up @@ -174,6 +176,7 @@ def get_licenses(self, scancode_licenses=None, **kwargs):
try:
with io.open(lic.text_file, 'w', encoding='utf-8')as tf:
tf.write(text)
models.update_ignorables(lic, verbose=False)
lic.dump()
licenses.append(lic)
except:
Expand Down Expand Up @@ -546,7 +549,7 @@ def build_license(self, mapping, scancode_licenses):
# instead each part of the combo
dejacode_special_composites = set([
'intel-bsd-special',
#'newlib-subdirectory',
# 'newlib-subdirectory',
])
is_component_license = mapping.get('is_component_license') or False

Expand Down Expand Up @@ -816,8 +819,8 @@ def license_to_dict(lico):


def merge_licenses(
scancode_license,
external_license,
scancode_license,
external_license,
updatable_attributes,
from_spdx=False,
):
Expand Down Expand Up @@ -948,7 +951,7 @@ def update_external(_attrib, _sc_val, _ext_val):
# on difference, the other license wins
if scancode_value != external_value:
# unless we have SPDX ids
if attrib== 'spdx_license_key' and external_value.startswith('LicenseRef-scancode'):
if attrib == 'spdx_license_key' and external_value.startswith('LicenseRef-scancode'):
update_external(attrib, scancode_value, external_value)
else:
update_scancode(attrib, scancode_value, external_value)
Expand Down Expand Up @@ -1138,10 +1141,14 @@ def synchronize_licenses(scancode_licenses, external_source, use_spdx_key=False,

# finally write changes in place for updates and news
for k in updated_in_scancode | added_to_scancode:
scancodes_by_key[k].dump()
lic = scancodes_by_key[k]
models.update_ignorables(lic, verbose=False)
lic.dump()

for k in updated_in_external | added_to_external:
externals_by_key[k].dump()
lic = externals_by_key[k]
# models.update_ignorables(lic, verbose=False)
lic.dump()

# TODO: at last: print report of incorrect OTHER licenses to submit
# updates eg. make API calls to DejaCode to create or update
Expand Down
85 changes: 52 additions & 33 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,9 @@ def from_node(
(r'^Create$', 'NN'),
(r'^Engine\.$', 'NN'),
(r'^While$', 'NN'),
(r'^Review', 'NN'),
(r'^Help', 'NN'),
(r'^Web', 'NN'),

# alone this is not enough for an NNP
(r'^Free$', 'NN'),
Expand Down Expand Up @@ -1302,34 +1305,34 @@ def from_node(
(r'^LIMITED[,\.]??$', 'COMP'),

# Caps company suffixes
(r'^INC\.?,?\)?$', 'COMP'),
(r'^INCORPORATED\.?,?\)?$', 'COMP'),
(r'^CORP\.?,?\)?$', 'COMP'),
(r'^CORPORATION\.?,?\)?$', 'COMP'),
(r'^FOUNDATION\.?,?$', 'COMP'),
(r'^GROUP\.?,?$', 'COMP'),
(r'^COMPANY\.?,?$', 'COMP'),
(r'^\(tm\).?$', 'COMP'),
(r'^[Ff]orum\.?,?', 'COMP'),
(r'^INC[\.,\)]*$', 'COMP'),
(r'^INCORPORATED[\.,\)]*$', 'COMP'),
(r'^CORP[\.,\)]*$', 'COMP'),
(r'^CORPORATION[\.,\)]*$', 'COMP'),
(r'^FOUNDATION[\.,\)]*$', 'COMP'),
(r'^GROUP[\.,\)]*$', 'COMP'),
(r'^COMPANY[\.,\)]*$', 'COMP'),
(r'^\(tm\)[\.,]?$', 'COMP'),
(r'^[Ff]orum[\.,\)]*', 'COMP'),

# company suffix
(r'^[Cc]orp\.?,?\)?$', 'COMP'),
(r'^[Cc]orp(oration|\.,?)?\)?$', 'COMP'),
(r'^[Cc][oO]\.,?$', 'COMP'),
(r'^[Cc]orporations?\.?,?$', 'COMP'),
(r'^[Ff]oundation\.?,?$', 'COMP'),
(r'^[Aa]lliance\.?,?$', 'COMP'),
(r'^[Cc]orp[\.,\)]*$', 'COMP'),
(r'^[Cc]orporation[\.,\)]*$', 'COMP'),
(r'^[Cc][oO][\.,\)]*$', 'COMP'),
(r'^[Cc]orporations?[\.,\)]*$', 'COMP'),
(r'^[Cc]onsortium[\.,\)]*$', 'COMP'),

(r'^[Ff]oundation[\.,\)]*$', 'COMP'),
(r'^[Aa]lliance[\.,\)]*$', 'COMP'),
(r'^Working$', 'COMP'),
(r'^[Gg]roup\.?,?$', 'COMP'),
(r'^[Tt]echnology\.?,?$', 'COMP'),
(r'^[Tt]echnologies\.?,?$', 'COMP'),
(r'^[Cc]ommunity\.?,?$', 'COMP'),
(r'^[Cc]ommunities\.?,?$', 'COMP'),
(r'^[Mm]icrosystems\.?,?$', 'COMP'),
(r'^[Pp]rojects?\.?,?$', 'COMP'),
(r'^[Tt]eams?\.?$', 'COMP'),
(r'^[Tt]ech\.?,?$', 'COMP'),
(r"^Limited'?\.?,?$", 'COMP'),
(r'^[Gg]roup[\.,\)]*$', 'COMP'),
(r'^[Tt]echnolog(y|ies)[\.,\)]*$', 'COMP'),
(r'^[Cc]ommunit(y|ies)[\.,\)]*$', 'COMP'),
(r'^[Mm]icrosystems[\.,\)]*$', 'COMP'),
(r'^[Pp]rojects?[\.,\)]*,?$', 'COMP'),
(r'^[Tt]eams?[\.,\)]*$', 'COMP'),
(r'^[Tt]ech[\.,\)]*$', 'COMP'),
(r"^Limited'?[\.,\)]*$", 'COMP'),

# company suffix : LLC, LTD, LLP followed by one extra char
(r'^[Ll][Tt][Dd]\.?,?$', 'COMP'),
Expand All @@ -1341,7 +1344,8 @@ def from_node(

# company suffix : SA, SAS, AG, AB, AS, CO, labs followed by a dot
(r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe)\.?$', 'COMP'),

# French SARL
(r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$', 'COMP'),
# company suffix : AS: this is frequent beyond Norway.
(r'^AS.$', 'COMP'),
(r'^AS', 'CAPS'),
Expand All @@ -1361,11 +1365,13 @@ def from_node(
# (dutch and belgian) company suffix
(r'^[Bb]\.?[Vv]\.?|BVBA$', 'COMP'),
# university
(r'^\(?[Uu]niv(?:[.]|ersit(?:y|e|at?|ad?))\)?\.?$', 'UNI'),
(r'^\(?[Uu]niv(?:[.]|ersit(?:y|e|at?|ad?))[\.,\)]*$', 'UNI'),
(r'^UNIVERSITY$', 'UNI'),
(r'^College$', 'UNI'),
# Academia/ie
(r'^[Ac]cademi[ae]s?$', 'UNI'),
# Academia/ie
(r'^[Ac]cademy[\.,\)]*$', 'UNI'),

# institutes
(r'INSTITUTE', 'COMP'),
Expand Down Expand Up @@ -1612,11 +1618,11 @@ def from_node(
# URLS such as <(http://fedorahosted.org/lohit)> or ()
(r'[<\(]https?:.*[>\)]', 'URL'),
# URLS such as ibm.com without a scheme
(r'\s?[a-z0-9A-Z\-\.\_]+\.([Cc][Oo][Mm]|[Nn][Ee][Tt]|[Oo][Rr][Gg]|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|de|be|nl|au|biz)\s?\.?$', 'URL2'),
(r'\s?[a-z0-9A-Z\-\.\_]+\.([Cc][Oo][Mm]|[Nn][Ee][Tt]|[Oo][Rr][Gg]|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|de|be|se|nl|au|biz)\s?\.?$', 'URL2'),
# TODO: add more extensions: there are so main TLD these days!
# URL wrapped in () or <>
(r'[\(<]+\s?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|nl|au|biz)\s?[\.\)>]+$', 'URL'),
(r'<?a?.(href)?.\(?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|nl|au|biz)[\.\)>]?$', 'URL'),
(r'[\(<]+\s?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|se|nl|au|biz)\s?[\.\)>]+$', 'URL'),
(r'<?a?.(href)?.\(?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|se|nl|au|biz)[\.\)>]?$', 'URL'),
# derived from regex in cluecode.finder
(r'<?a?.(href)?.('
r'(?:http|ftp|sftp)s?://[^\s<>\[\]"]+'
Expand Down Expand Up @@ -1762,7 +1768,7 @@ def from_node(
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
COMPANY: {<NNP> <NNP> <CC> <NNP> <COMP> <NNP> <CAPS>}

COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>?} #200
COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>*} #200

# Android Open Source Project, 3Dfx Interactive, Inc.
COMPANY: {<NN>? <NN> <NNP> <COMP>} #205
Expand All @@ -1773,8 +1779,14 @@ def from_node(
# NNP NN NNP NNP COMP COMP')
COMPANY: {<NNP> <NN> <NNP> <NNP> <COMP>+} #207

# was COMPANY {<NNP|CAPS> <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <COMP> <COMP>?} #210
COMPANY: {<NNP|CAPS>+ <COMP>+} #210
# Massachusetts Institute of Technology
COMPANY: {<NNP> <COMP|COMPANY> <OF> <NNP>+} #208

COMPANY: {<NNP|CAPS>+ <COMP|COMPANY>+} #210

# University of Southern California, Information Sciences Institute (ISI)
COMPANY: {<UNI> <OF> <COMPANY> <CAPS>?} #211

COMPANY: {<UNI|NNP> <VAN|OF> <NNP>+ <UNI>?} #220
COMPANY: {<NNP>+ <UNI>} #230
COMPANY: {<UNI> <OF> <NN|NNP>} #240
Expand All @@ -1783,6 +1795,9 @@ def from_node(
# University of Southern California, Information Sciences Institute (ISI)
COMPANY: {<COMPANY> <COMPANY> <CAPS>} #251

# University of Technology
COMPANY: {<UNI> <OF> <COMP|COMPANY>} #252

# GNOME i18n Project for Vietnamese
COMPANY: {<CAPS> <NN> <COMP> <NN> <NNP>} #253

Expand Down Expand Up @@ -1937,6 +1952,9 @@ def from_node(

COMPANY: {<COMPANY> <COMP|COMPANY>} #840

# the Software and Component Technologies group of Trimble Navigation, Ltd.
COMPANY: {<COMPANY> <OF> <COMP|COMPANY>} #840.1

# University Corporation for Advanced Internet Development, Inc.
COMPANY: {<UNI> <COMPANY>} #845

Expand Down Expand Up @@ -2783,6 +2801,7 @@ def refine_names(s, prefixes):
'copyright 2003 m. y.',
'copyright 2001 m. y. name',
'copyright 2001 m. y.',
'copyright help center',
])

################################################################################
Expand Down
3 changes: 3 additions & 0 deletions src/licensedcode/data/licenses/bsd-4-clause-shortened.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ owner: Regents of the University of California
spdx_license_key: BSD-4-Clause-Shortened
other_urls:
- https://metadata.ftp-master.debian.org/changelogs//main/a/arpwatch/arpwatch_2.1a15-7_copyright
ignorable_authors:
- the University of California, Lawrence Berkeley Laboratory and its contributors

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,5 @@ category: Proprietary Free
owner: Thufie
homepage_url: https://thufie.lain.haus/NPL.html
spdx_license_key: LicenseRef-scancode-cooperative-non-violent-4.0
ignorable_authors:
- the Web Service
ignorable_urls:
- https://thufie.lain.haus/NPL.html
Loading