-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtest_software.py
281 lines (260 loc) · 14.4 KB
/
test_software.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pytest
import requests_cache
from software import Software
requests_cache.install_cache(
"my_requests_cache", expire_after=60 * 60 * 24 * 7
) # expire_after is in seconds
# run tests with pytest
# Use harvard1 citation style
arxiv_urls = [
(
"arXiv:1802.02689",
"Borgman, C., Scharnhorst, A. & Golshan, M., 2018. Digital Data Archives as Knowledge Infrastructures: Mediating Data Sharing and Reuse. <i>arXiv</i>. Available at: http://arxiv.org/abs/1802.02689v2.",
),
(
"1807.09464",
"Duchene, J. et al., 2018. Specification-Based Protocol Obfuscation. <i>arXiv</i>. Available at: http://arxiv.org/abs/1807.09464v1.",
),
(
"https://freud.readthedocs.io/en/stable/",
"Ramasubramani, V. et al., 2020. freud: A software suite for high throughput analysis of particle simulation data. <i>Computer Physics Communications</i>, 254, p.107275. Available at: https://doi.org/10.1016/j.cpc.2020.107275.",
),
]
cran_urls = [
(
"CRAN.R-project.org/package=surveillance",
"Salmon, M., Schumacher, D. & H\xf6hle, M., 2016. Monitoring Count Time Series inR: Aberration Detection in Public Health Surveillance. <i>Journal of Statistical Software</i>, 70(10). Available at: https://doi.org/10.18637/jss.v070.i10.",
),
(
"CRAN.R-project.org/package=changepoint",
"Killick, R., Haynes, K. & Eckley, I., 2016. changepoint: Methods for Changepoint Detection. <i>R package version 2.2.2</i>. Available at: https://CRAN.R-project.org/package=changepoint.",
),
(
"CRAN.R-project.org/package=tidyverse",
"Wickham, H. et al., 2019. Welcome to the Tidyverse. <i>Journal of Open Source Software</i>, 4(43), p.1686. Available at: https://doi.org/10.21105/joss.01686.",
),
(
"https://cran.r-project.org/web/packages/BDP2/",
"Kopp-Schneider, A., Wiesenfarth, M. & Abel, U., 2018. BDP2: Bayesian Adaptive Designs for Phase II Trials with Binary. <i>R package version 0.1.3</i>. Available at: https://CRAN.R-project.org/package=BDP2.",
),
(
"https://cran.r-project.org/web/packages/vistime/index.html",
"Raabe, S., 2021. vistime: Pretty Timelines in R. <i>R package version 1.2.1</i>. Available at: https://CRAN.R-project.org/package=vistime.",
),
(
"http://cran.r-project.org/package=abcrf",
"Marin, J.-M., 2019. abcrf: Approximate Bayesian Computation via Random Forests. <i>R package version 1.8.1</i>. Available at: https://CRAN.R-project.org/package=abcrf.",
),
(
"https://cran.r-project.org/web/packages/stringr",
"Hadley, W., 2019. stringr: Simple, Consistent Wrappers for Common String Operations. <i>R package version 1.4.0</i>. Available at: https://CRAN.R-project.org/package=stringr.",
),
]
doi_urls = [
(
"10.1109/5.771073",
"Paskin, N., 1999. Toward unique identifiers. <i>Proceedings of the IEEE</i>, 87(7), pp.1208–1227. Available at: https://doi.org/10.1109/5.771073.",
),
(
"10.1093/ajae/aaq063",
"Shi, G., Chavas, J.-. paul . & Stiegert, K., 2010. An Analysis of the Pricing of Traits in the U.S. Corn Seed Market. <i>American Journal of Agricultural Economics</i>, 92(5), pp.1324–1338. Available at: https://doi.org/10.1093/ajae/aaq063.",
),
]
github_urls = [
(
"https://github.com/pvlib/pvlib-python",
"Holmgren, W. et al., 2020. <i>pvlib/pvlib-python: v0.7.2</i>, Zenodo. Available at: https://doi.org/10.5281/zenodo.3762635.",
),
(
"https://github.com/gcowan/hyperk",
"Cowan, G., 2016. Gcowan/Hyperk: Mcp Data Processing Code. Available at: https://doi.org/10.5281/zenodo.160400.",
),
(
"https://github.com/NSLS-II-XPD/xpdView",
"Duff, C. & Kaming-Thanassi, J., 2016. Xpdview: Xpdview Initial Release. Available at: https://doi.org/10.5281/zenodo.60479.",
),
(
"https://github.com/impactstory/depsy",
"OurResearch, 2015. depsy. Available at: https://github.com/ourresearch/depsy.",
),
(
"https://github.com/abianchetti/blick",
"Bianchetti, A., 2012. blick. Available at: https://github.com/abianchetti/blick.",
),
(
"https://github.com/jasonpriem/FeedVis",
"Priem, J., 2011. FeedVis. Available at: https://github.com/jasonpriem/FeedVis.",
),
(
"https://github.com/vahtras/loprop",
"Vahtras, O., 2014. Loprop For Dalton. Available at: https://doi.org/10.5281/zenodo.13276.",
),
(
"https://github.com/cvitolo/r_BigDataAnalytics",
"Vitolo, C., 2015. R_Bigdataanalytics V.0.0.1. Available at: https://doi.org/10.5281/zenodo.15722.",
),
(
"https://github.com/dfm/emcee",
"Foreman-Mackey, D. et al., 2013. emcee: The MCMC Hammer. <i>Publications of the Astronomical Society of the Pacific</i>, 125(925), pp.306\u2013312. Available at: https://doi.org/10.1086/670067.",
),
(
"https://github.com/robintw/Py6S",
"Wilson, R.T., 2013. Py6S: A Python interface to the 6S radiative transfer model. <i>Computers & Geosciences</i>, 51, pp.166–171. Available at: https://doi.org/10.1016/j.cageo.2012.08.002.",
),
(
"https://github.com/nicholasricci/DDM_Framework",
"Marzolla, M., D'Angelo, G. & Mandrioli, M., 2013. A Parallel Data Distribution Management Algorithm.",
),
(
"https://gist.github.com/vegaasen/157fbc6dce8545b7f12c",
"Aasen, V., 2015. supress-warning-idea.md. Available at: https://gist.github.com/157fbc6dce8545b7f12c.",
),
(
"https://github.com/cboettig/noise-phenomena",
'Boettiger, C., 2018. Cboettig/Noise-Phenomena: Supplement To: "From Noise To Knowledge: How Randomness Generates Novel Phenomena And Reveals Information". Available at: https://doi.org/10.5281/zenodo.1219780.',
),
(
"https://dbdp.org/",
"Bent, B. et al., 2020. The digital biomarker discovery pipeline: An open-source software platform for the development of digital biomarkers using mHealth and wearables data. <i>Journal of Clinical and Translational Science</i>, 5(1). Available at: https://doi.org/10.1017/cts.2020.511.",
),
]
website_urls = [
(
"http://yt-project.org",
"Turk, M.J. et al., 2010. Yt: A Multi-Code Analysis Toolkit For Astrophysical Simulation Data. <i>The Astrophysical Journal Supplement Series</i>, 192(1), p.9. Available at: https://doi.org/10.1088/0067-0049/192/1/9.",
),
(
"http://fftw.org/",
"Frigo, M. & Johnson, S.G., 2005. The Design and Implementation of FFTW3. <i>Proceedings of the IEEE</i>, 93(2), pp.216–231. Available at: https://doi.org/10.1109/jproc.2004.840301.",
),
(
"www.simvascular.org",
" 2015. SimVascular. Available at: https://github.com/SimVascular/SimVascular.",
),
(
"https://bioconductor.org/packages/release/bioc/html/edgeR.html",
"Yunshun Chen <Yuchen@Wehi.Edu.Au>, A., Davis McCarthy <Dmccarthy@Wehi.Edu.Au>, Xiaobei Zhou <Xiaobei.Zhou@Uzh.Ch>, Mark Robinson<Mark.Robinson@Imls.Uzh.Ch>, Gordon Smyth <Smyth@Wehi.Edu.Au>, 2017. edgeR. Available at: https://doi.org/10.18129/b9.bioc.edger.",
),
(
"https://slicer.org/",
"Punzo, D. et al., 2017. SlicerAstro: A 3-D interactive visual analytics tool for HI data. <i>Astronomy and Computing</i>, 19, pp.45–59. Available at: https://doi.org/10.1016/j.ascom.2017.03.004.",
),
(
"https://vhub.org/resources/puffin",
"Bursik, M.I. et al., 2013. puffin. Available at: https://vhub.org/resources/puffin.",
),
(
"https://ccdproc.readthedocs.io/en/latest/",
"Craig, M. et al., 2017. Astropy/Ccdproc: V1.3.0.Post1. Available at: https://doi.org/10.5281/zenodo.1069648.",
),
(
"https://photutils.readthedocs.io/",
"Bradley, L. et al., 2021. <i>astropy/photutils: 1.1.0</i>, Zenodo. Available at: https://doi.org/10.5281/zenodo.596036.",
),
(
"https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30120-1/fulltext",
"Dong, E., Du, H. & Gardner, L., 2020. An interactive web-based dashboard to track COVID-19 in real time. <i>The Lancet Infectious Diseases</i>, 20(5), pp.533–534. Available at: https://doi.org/10.1016/s1473-3099(20)30120-1.",
),
(
"freud.readthedocs.io",
"Anon, freud — freud 2.6.2 documentation. Available at: http://freud.readthedocs.io.",
),
(
"nullhttps://www.nytimes.com/2021/04/22/climate/biden-emissions-target-economy.html",
"Anon, Biden Wants to Slash Emissions. Success Would Mean a Very Different America. - The New York Times. Available at: https://www.nytimes.com/2021/04/22/climate/biden-emissions-target-economy.html.",
),
(
"https://www.thebalancecareers.com/constructive-feedback-to-help-employees-grow-4120943",
"Anon, How to Provide Feedback to Help Employees Grow Their Skills. Available at: https://www.thebalancecareers.com/constructive-feedback-to-help-employees-grow-4120943.",
),
]
key_word_urls = [
(
"pvlib",
"F. Holmgren, W., W. Hansen, C. & A. Mikofski, M., 2018. pvlib python: a python package for modeling solar energy systems. <i>Journal of Open Source Software</i>, 3(29), p.884. Available at: https://doi.org/10.21105/joss.00884.",
)
]
urls_to_test = (
arxiv_urls + cran_urls + doi_urls + github_urls + key_word_urls + website_urls
)
@pytest.mark.parametrize("url,expected", urls_to_test)
def test_citations(url, expected):
my_software = Software(url)
my_software.find_metadata()
assert my_software.citation_plain == expected
def test_source_preview():
my_software = Software("https://cran.r-project.org/web/packages/stringr")
my_software.find_metadata()
resp = my_software.to_dict()
provenance = resp["provenance"][10]["source_preview"]
assert (
provenance["title"]
== '<i>Snapshot of title data found at https://cran.r-project.org/web/packages/stringr/DESCRIPTION.</i><br>Package: stringr<br />Title: <span class="highlight">'
"Simple, Consistent Wrappers for Common String Operations</span><br />Version: 1.4.0<br />Authors@R: <br /> c(person(given = "Hadley",<br />"
" family = "Wickham",<br /> role = c("aut", "cre", "cph"),<br /> "
"email = "hadley@rstudio.com"),<br /> person(given = "RStudio",<br /> role = c("cph", "fnd")))<br />"
"Description: A consistent, simple and easy to use set of<br /> wrappers around the fantastic 'stringi"
)
assert (
provenance["author"]
== "<i>Snapshot of author data found at https://cran.r-project.org/web/packages/stringr/DESCRIPTION.</i><br>Package: "
"stringr<br />Title: Simple, Consistent Wrappers for Common String Operations<br />Version: 1.4.0<br />Authors@R: <br /> "
'c(person(given = "<span class="highlight">Hadley</span>",<br /> family = "<span class="highlight">Wickham</span>",<br /> '
'role = c("aut", "cre", "cph"),<br /> email = "hadley@rstudio.com"),<br /> person(given = "RStudio",<br /> '
'role = c("cph", "fnd")))<br />Description: A consistent, simple and easy to use set of<br /> wrappers around the fantastic stringi package. All function and<br /> '
'argument names (and positions) are consistent, all functions deal with<br /> "NA"s and zero length vectors in the same way, and the output from<br /> one function is easy t'
)
assert (
provenance["year"]
== "<i>Snapshot of year data found at https://cran.r-project.org/web/packages/stringr/DESCRIPTION.</i><br>tringr<br />BugReports: "
"https://github.com/tidyverse/stringr/issues<br />Depends: R (>= 3.1)<br />Imports: glue (>= 1.2.0), magrittr, stringi (>= 1.1.7)<br />Suggests: covr, htmltools, "
"htmlwidgets, knitr, rmarkdown, testthat<br />VignetteBuilder: knitr<br />Encoding: UTF-8<br />LazyData: true<br />RoxygenNote: 6.1.1<br />"
"NeedsCompilation: no<br />Packaged: 2019-02-09 16:03:19 UTC; hadley<br />Author: Hadley Wickham [aut, cre, cph],<br /> RStudio [cph, fnd]<br />"
'Maintainer: Hadley Wickham <hadley@rstudio.com><br />Repository: CRAN<br />Date/Publication: <span class="highlight">2019-02-10 03:40:03 UTC</span><br />'
)
def test_provenance():
my_software = Software("http://yt-project.org")
my_software.find_metadata()
resp = my_software.to_dict()
provenance = resp["provenance"]
steps_with_content = [
{"step_name": "UserInputStep", "parent_step_name": "NoneType"},
{"step_name": "WebpageStep", "parent_step_name": "UserInputStep"},
{"step_name": "GithubRepoStep", "parent_step_name": "WebpageStep"},
{"step_name": "GithubCitationFileStep", "parent_step_name": "GithubRepoStep"},
{
"step_name": "CrossrefResponseStep",
"parent_step_name": "GithubCitationFileStep",
},
{
"step_name": "CrossrefResponseMetadataStep",
"parent_step_name": "CrossrefResponseStep",
},
]
steps_without_content = [
{"step_name": "CrossrefResponseStep", "parent_step_name": "UserInputStep"},
{"step_name": "ArxivResponseStep", "parent_step_name": "UserInputStep"},
{"step_name": "GithubRepoStep", "parent_step_name": "UserInputStep"},
{"step_name": "BitbucketRepoStep", "parent_step_name": "UserInputStep"},
{"step_name": "CranLibraryStep", "parent_step_name": "UserInputStep"},
{"step_name": "PypiLibraryStep", "parent_step_name": "UserInputStep"},
{"step_name": "RelationHeaderStep", "parent_step_name": "WebpageStep"},
{"step_name": "CrossrefResponseStep", "parent_step_name": "WebpageStep"},
{"step_name": "GithubCodemetaFileStep", "parent_step_name": "GithubRepoStep"},
]
for step in steps_with_content:
for p in provenance:
if (
p["name"] == step["step_name"]
and p["parent_step_name"] == step["parent_step_name"]
):
assert p["has_content"] is True
for step in steps_without_content:
for p in provenance:
if (
p["name"] == step["step_name"]
and p["parent_step_name"] == step["parent_step_name"]
):
assert p["has_content"] is False