-
-
Notifications
You must be signed in to change notification settings - Fork 562
/
cluecode_test_utils.py
269 lines (216 loc) · 8.29 KB
/
cluecode_test_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import io
from itertools import chain
from os import path
import attr
import pytest
import saneyaml
from commoncode.testcase import FileDrivenTesting
from commoncode.testcase import get_test_file_pairs
from commoncode.text import python_safe_name
from scancode_config import REGEN_TEST_FIXTURES
"""
Data-driven Copyright test utilities.
"""
test_env = FileDrivenTesting()
test_env.test_data_dir = path.join(path.dirname(__file__), 'data')
@attr.s(slots=True)
class CopyrightTest(object):
"""
A copyright detection test is used to verify that copyright detection works
correctly
It consists of two files with the same file name: a .yml file with test data
and a test file with any other extension (and the same name whenremoving the
.yml extension) that needs to be tested for detection.
The following data are loaded based on or from the .yml file:
- a test file to scan for copyrights (based on file name convenstions),
- what to test
- a list of expected copyrights, authors or holders to detect,
- optional notes.
- a list of expected_failures
If a list of expected data is not provided or empty, then this test should
not detect any such data in the test file.
"""
data_file = attr.ib(default=None)
test_file = attr.ib(default=None)
# one of holders, copyrights, authors
what = attr.ib(default=attr.Factory(list))
copyrights = attr.ib(default=attr.Factory(list))
holders = attr.ib(default=attr.Factory(list))
authors = attr.ib(default=attr.Factory(list))
holders_summary = attr.ib(default=attr.Factory(list))
copyrights_summary = attr.ib(default=attr.Factory(list))
authors_summary = attr.ib(default=attr.Factory(list))
expected_failures = attr.ib(default=attr.Factory(list))
notes = attr.ib(default=None)
def __attrs_post_init__(self, *args, **kwargs):
if self.data_file:
try:
with io.open(self.data_file, encoding='utf-8') as df:
for key, value in saneyaml.load(df.read()).items():
if value:
setattr(self, key, value)
except:
import traceback
msg = (
f'file://{self.data_file}\n'
f'{self!r}\n' + traceback.format_exc()
)
raise Exception(msg)
# fix counts to be ints: saneyaml loads everything as string
for holders_sum in self.holders_summary:
holders_sum['count'] = int(holders_sum['count'])
for copyrs_sum in self.copyrights_summary:
copyrs_sum['count'] = int(copyrs_sum['count'])
for auths_sum in self.authors_summary:
auths_sum['count'] = int(auths_sum['count'])
def to_dict(self):
"""
Serialize self to an ordered mapping.
"""
filtered = [
field for field in attr.fields(CopyrightTest)
if '_file' in field.name
]
fields_filter = attr.filters.exclude(*filtered)
data = attr.asdict(self, filter=fields_filter, dict_factory=dict)
return dict([
(key, value) for key, value in data.items()
# do not dump false and empties
if value
])
def dumps(self):
"""
Return a string representation of self in YAML block format.
"""
return saneyaml.dump(self.to_dict())
def dump(self, check_exists=False):
"""
Dump a representation of self to a .yml data_file in YAML block format.
"""
if check_exists and path.exists(self.data_file):
raise Exception(self.data_file)
with io.open(self.data_file, 'w', encoding='utf-8') as df:
df.write(self.dumps())
COPYRIGHT_TEST_TEMPLATE ="""what:
- copyrights
- holders
- authors
copyrights:
-
holders:
-
"""
def load_copyright_tests(test_dir=test_env.test_data_dir, generate_missing=False):
"""
Yield an iterable of CopyrightTest loaded from test data files in `test_dir`.
"""
test_dirs = (path.join(test_dir, td) for td in
('copyrights', 'ics', 'holders', 'authors', 'years', 'generated', 'copyright_fossology'))
gen_missing_temp = generate_missing and COPYRIGHT_TEST_TEMPLATE or None
all_test_files = chain.from_iterable(
get_test_file_pairs(td, template_to_generate_missing_yaml=gen_missing_temp)
for td in test_dirs
)
for data_file, test_file in all_test_files:
yield CopyrightTest(data_file, test_file)
def as_sorted_mapping(counter):
"""
Return a list of ordered mapping of {value:val, count:cnt} built from a
`counter` mapping of {value: count} and sortedd by decreasing count then by
value.
"""
def by_count_value(value_count):
value, count = value_count
return -count, value
summarized = [
dict([('value', value), ('count', count)])
for value, count in sorted(counter.items(), key=by_count_value)
]
return summarized
def make_copyright_test_functions(
test,
index,
test_data_dir=test_env.test_data_dir,
regen=REGEN_TEST_FIXTURES,
):
"""
Build and return a test function closing on tests arguments and the function
name. Create only a single function for multiple tests (e.g. copyrights and
holders together).
"""
from cluecode.copyrights import detect_copyrights
from cluecode.copyrights import Detection
from summarycode.copyright_tallies import tally_copyrights
from summarycode.copyright_tallies import tally_persons
def closure_test_function(*args, **kwargs):
detections = detect_copyrights(test_file)
copyrights, holders, authors = Detection.split_values(detections)
holders_summary = []
if 'holders_summary' in test.what:
holders_summary = as_sorted_mapping(tally_persons(holders))
copyrights_summary = []
if 'copyrights_summary' in test.what:
copyrights_summary = as_sorted_mapping(tally_copyrights(copyrights))
authors_summary = []
if 'authors_summary' in test.what:
authors_summary = as_sorted_mapping(tally_persons(authors))
results = dict(
copyrights=copyrights,
authors=authors,
holders=holders,
holders_summary=holders_summary,
copyrights_summary=copyrights_summary,
authors_summary=authors_summary,
)
expected_yaml = test.dumps()
for wht in test.what:
setattr(test, wht, results.get(wht))
results_yaml = test.dumps()
if regen:
test.dump()
if expected_yaml != results_yaml:
expected_yaml = (
'data file: file://' + data_file +
'\ntest file: file://' + test_file + '\n'
) + expected_yaml
assert results_yaml == expected_yaml
data_file = test.data_file
test_file = test.test_file
tfn = test_file.replace(test_data_dir, '').strip('\\/\\')
test_name = python_safe_name(f'test_{tfn}_{index}')
closure_test_function.__name__ = test_name
if test.expected_failures:
closure_test_function = pytest.mark.xfail(closure_test_function)
return closure_test_function, test_name
def build_tests(
copyright_tests,
clazz,
test_data_dir=test_env.test_data_dir,
regen=REGEN_TEST_FIXTURES,
):
"""
Dynamically build test methods from a sequence of CopyrightTest and attach
these method to the clazz test class.
"""
for i, test in enumerate(sorted(copyright_tests, key=lambda x:x.test_file)):
# closure on the test params
if test.expected_failures:
actual_regen = False
else:
actual_regen = regen
method, name = make_copyright_test_functions(
test=test,
index=i,
test_data_dir=test_data_dir,
regen=actual_regen,
)
# attach that method to our test class
setattr(clazz, name, method)