-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Added schemas for publisher (Springer) parser * Added schemas for generic parser * Added schemas for enhancer output * Added tests * Added note about schemas in README * ref: cern-sis/issues-scoap3#65
- Loading branch information
Showing
17 changed files
with
711 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
from marshmallow import Schema, fields | ||
|
||
|
||
class Abstracts(Schema): | ||
value = fields.Str( | ||
required=True, error_messages={"required": "Value in abstracts is required"} | ||
) | ||
source = fields.Str( | ||
required=True, error_messages={"required": "Source in abstracts is required"} | ||
) | ||
|
||
|
||
class AquisitionSource(Schema): | ||
source = fields.Str( | ||
required=True, | ||
error_messages={"required": "Source in aquisition source is required"}, | ||
) | ||
method = fields.Str( | ||
required=True, | ||
error_messages={"required": "Method in aquisition source is required"}, | ||
) | ||
date = fields.DateTime( | ||
required=True, | ||
error_messages={"required": "Date in aquisition source is required"}, | ||
) | ||
submission_number = fields.Str( | ||
required=True, | ||
error_messages={ | ||
"required": "Submission number in aquisition source is required" | ||
}, | ||
) | ||
|
||
|
||
class CopyRight(Schema): | ||
holder = fields.Str( | ||
required=True, | ||
error_messages={"required": "Holder in copy right source is required"}, | ||
) | ||
year = fields.Int( | ||
required=True, | ||
error_messages={"required": "Year in copy right source is required"}, | ||
) | ||
statement = fields.Str( | ||
required=True, | ||
error_messages={"required": "Statement in copy right source is required"}, | ||
) | ||
material = fields.Str( | ||
required=True, | ||
error_messages={"required": "Material in copy right source is required"}, | ||
) | ||
|
||
|
||
class Imprints(Schema): | ||
date = fields.Date( | ||
required=True, | ||
error_messages={"required": "Date in imprints source is required"}, | ||
) | ||
publisher = fields.Str( | ||
required=True, | ||
error_messages={"required": "Date in imprints source is required"}, | ||
) | ||
|
||
|
||
class Titles(Schema): | ||
title = fields.Str( | ||
required=True, error_messages={"required": "Tile in titles source is required"} | ||
) | ||
subtitle = fields.Str( | ||
required=True, | ||
error_messages={"required": "Subtitle in subtitle source is required"}, | ||
) | ||
source = fields.Str( | ||
required=True, | ||
error_messages={"required": "Source in titles source is required"}, | ||
) | ||
|
||
|
||
class EnhancementSchema(Schema): | ||
abstracts = fields.List(fields.Nested(Abstracts()), required=True) | ||
acquisition_source = fields.Nested(AquisitionSource(), required=True) | ||
copyright = fields.List(fields.Nested(CopyRight()), required=True) | ||
imprints = fields.List(fields.Nested(Imprints()), required=True) | ||
record_creation_date = fields.DateTime(required=True) | ||
titles = fields.List(fields.Nested(Titles()), required=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from marshmallow import Schema, fields | ||
from schemas.parser_schema import Affiliations, Author, License, ValueDict | ||
|
||
|
||
class ClassificationNumber(Schema): | ||
classification_number = fields.Str(required=True) | ||
standard = fields.Str(required=True) | ||
|
||
|
||
class Collection(Schema): | ||
primary = fields.Str(required=True) | ||
|
||
|
||
class FreeKeyword(Schema): | ||
source = fields.Str(required=True) | ||
value = fields.Str(required=True) | ||
|
||
|
||
class ThesisSupervisor(Schema): | ||
affiliations = fields.List(fields.Nested(Affiliations()), required=True) | ||
full_name = fields.Str(required=True) | ||
|
||
|
||
class PublicationInfo(Schema): | ||
artid = fields.Str(required=True) | ||
journal_issue = fields.Str(required=True) | ||
journal_title = fields.Str(required=True) | ||
journal_volume = fields.Str(required=True) | ||
material = fields.Str(required=True) | ||
page_end = fields.Str(required=True) | ||
page_start = fields.Str(required=True) | ||
year = fields.Int(required=True) | ||
|
||
|
||
class GenericParserSchema(Schema): | ||
abstract = fields.Str(required=True) | ||
arxiv_eprints = fields.List(fields.Nested(ValueDict()), required=True) | ||
authors = fields.List(fields.Nested(Author()), required=True) | ||
classification_numbers = fields.List( | ||
fields.Nested(ClassificationNumber()), required=True | ||
) | ||
collaborations = fields.List(fields.Nested(ValueDict()), required=True) | ||
collections = fields.List(fields.Nested(Collection()), required=True) | ||
control_field = fields.Str(required=True) | ||
copyright_holder = fields.Str(required=True) | ||
copyright_year = fields.Str(required=True) | ||
date_published = fields.Date(required=True) | ||
dois = fields.List(fields.Nested(ValueDict()), required=True) | ||
free_keywords = fields.List(fields.Nested(FreeKeyword()), required=True) | ||
license = fields.List(fields.Nested(License()), required=True) | ||
local_files = fields.List(fields.Nested(ValueDict()), required=True) | ||
page_nr = fields.List(fields.Int(required=True)) | ||
publication_info = fields.List(fields.Nested(PublicationInfo()), required=True) | ||
thesis = fields.Str(required=True) | ||
thesis_supervisor = fields.List(fields.Nested(ThesisSupervisor()), required=True) | ||
title = fields.Str(required=True) | ||
urls = fields.List(fields.Nested(ValueDict()), required=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from marshmallow import Schema, fields | ||
|
||
|
||
class ValueDict(Schema): | ||
value = fields.Str(required=True) | ||
|
||
|
||
class Affiliations(Schema): | ||
value = fields.Str(required=True) | ||
organization = fields.Str(required=True) | ||
country = fields.Str(required=True) | ||
|
||
|
||
class Author(Schema): | ||
surname = fields.Str(required=True) | ||
given_names = fields.Str(required=True) | ||
email = fields.Str(required=True) | ||
affiliations = fields.List(fields.Nested(Affiliations())) | ||
full_name = fields.Str(required=True) | ||
|
||
|
||
class License(Schema): | ||
license = fields.Str(required=True) | ||
url = fields.Str(required=True) | ||
|
||
|
||
class ParserSchema(Schema): | ||
journal_doctype = fields.Str(required=True) | ||
dois = fields.List(fields.Str(), required=True) | ||
arxiv_eprints = fields.List(fields.Nested(ValueDict()), required=True) | ||
page_nr = fields.List(fields.Int(), required=True) | ||
abstract = fields.Str(required=True) | ||
title = fields.Str(required=True) | ||
classification_numbers = fields.List(fields.Str(), required=True) | ||
authors = fields.List(fields.Nested(Author()), required=True) | ||
collaborations = fields.List(fields.Str(), required=True) | ||
journal_title = fields.Str(required=True) | ||
journal_issue = fields.Str(required=True) | ||
journal_volume = fields.Str(required=True) | ||
journal_artid = fields.Str(required=True) | ||
journal_fpage = fields.Str(required=True) | ||
journal_lpage = fields.Str(required=True) | ||
journal_year = fields.Int(required=True) | ||
date_published = fields.Date(required=True) | ||
related_article_doi = fields.List(fields.Str(), required=True) | ||
copyright_holder = fields.Str(required=True) | ||
# Really copy right year is a string? | ||
copyright_year = fields.Str(required=True) | ||
license = fields.List(fields.Nested(License()), required=True) | ||
collections = fields.List(fields.Str(), required=True) | ||
control_field = fields.List(fields.Str(), required=True) | ||
free_keywords = fields.List(fields.Str(), required=True) | ||
# is thesis supervisor really the same as author? | ||
thesis_supervisor = fields.List(fields.Nested(Author()), required=True) | ||
thesis = fields.List(fields.Str(), required=True) | ||
urls = fields.List(fields.Str(), required=True) | ||
local_files = fields.List(fields.Str(), required=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ PyYAML==6.0 | |
furl==2.1.3 | ||
structlog==21.5.0 | ||
bleach==4.1.0 | ||
marshmallow==3.15.0 |
20 changes: 20 additions & 0 deletions
20
tests/units/schemas/data/test_enchancement_schema/correct.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"abstracts": [{ "value": "this is abstracts", "source": "Springer" }], | ||
"acquisition_source": { | ||
"source": "Springer", | ||
"method": "Springer", | ||
"date": "2022-05-20T00:00:00", | ||
"submission_number": "path/to/the/file" | ||
}, | ||
"copyright": [ | ||
{ | ||
"holder": "copyright_holder", | ||
"year": "2020", | ||
"statement": "copyright_statement", | ||
"material": "copyright_material" | ||
} | ||
], | ||
"imprints": [{ "date": "2022-05-20", "publisher": "Springer" }], | ||
"record_creation_date": "2022-05-20T00:00:00", | ||
"titles": [{ "title": "title", "subtitle": "subtitle", "source": "Springer" }] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
18 changes: 18 additions & 0 deletions
18
tests/units/schemas/data/test_enchancement_schema/missing_fields.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"acquisition_source": { | ||
"source": "Springer", | ||
"method": "Springer", | ||
"date": "2022-05-20T00:00:00", | ||
"submission_number": "path/to/the/file" | ||
}, | ||
"copyright": [ | ||
{ | ||
"holder": "copyright_holder", | ||
"year": "2020", | ||
"statement": "copyright_statement", | ||
"material": "copyright_material" | ||
} | ||
], | ||
"record_creation_date": "2022-05-20T00:00:00", | ||
"titles": [{ "title": "title", "subtitle": "subtitle", "source": "Springer" }] | ||
} |
108 changes: 108 additions & 0 deletions
108
tests/units/schemas/data/test_generic_parser_schema/correct.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
{ | ||
"abstract": "Test abstract", | ||
"arxiv_eprints": [ | ||
{ | ||
"value": "Test Eprint" | ||
} | ||
], | ||
"authors": [ | ||
{ | ||
"affiliations": [ | ||
{ | ||
"country": "Test country", | ||
"organization": "Test org", | ||
"value": "Test affiliation" | ||
} | ||
], | ||
"email": "test@email.com", | ||
"full_name": "Test Surname, Test names", | ||
"given_names": "Test names", | ||
"surname": "Test Surname" | ||
} | ||
], | ||
"classification_numbers": [ | ||
{ | ||
"classification_number": "Test classification 1", | ||
"standard": "PACS" | ||
}, | ||
{ | ||
"classification_number": "Test classification 2", | ||
"standard": "PACS" | ||
} | ||
], | ||
"collaborations": [ | ||
{ | ||
"value": "Test collaboration" | ||
} | ||
], | ||
"collections": [ | ||
{ | ||
"primary": "Test Collection" | ||
} | ||
], | ||
"control_field": "Test control field", | ||
"copyright_holder": "Test Copyright", | ||
"copyright_year": "2019", | ||
"date_published": "2019-02-06", | ||
"dois": [ | ||
{ | ||
"value": "Test dois" | ||
}, | ||
{ | ||
"value": "Test related article doi" | ||
} | ||
], | ||
"free_keywords": [ | ||
{ | ||
"source": "author", | ||
"value": "Test free 1" | ||
}, | ||
{ | ||
"source": "author", | ||
"value": "Test free 2" | ||
} | ||
], | ||
"license": [ | ||
{ | ||
"license": "CC-BY-4.0", | ||
"url": "https://creativecommons.org/licenses//by/4.0" | ||
} | ||
], | ||
"local_files": [ | ||
{ | ||
"value": "Test local file" | ||
} | ||
], | ||
"page_nr": [45], | ||
"publication_info": [ | ||
{ | ||
"artid": "Test art-id", | ||
"journal_issue": "2", | ||
"journal_title": "Test title", | ||
"journal_volume": "79", | ||
"material": "article", | ||
"page_end": "45", | ||
"page_start": "1", | ||
"year": 2019 | ||
} | ||
], | ||
"thesis": "Test thesis", | ||
"thesis_supervisor": [ | ||
{ | ||
"affiliations": [ | ||
{ | ||
"country": "Test country", | ||
"organization": "Test org", | ||
"value": "Test affiliation" | ||
} | ||
], | ||
"full_name": "Test Surname, Test names" | ||
} | ||
], | ||
"title": "Test title", | ||
"urls": [ | ||
{ | ||
"value": "test.com" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
Oops, something went wrong.