Skip to content

Commit

Permalink
data model: implement contribution
Browse files Browse the repository at this point in the history
* Implements transformation for Marc21.
* Implements transformation for UNIMARC.
* Adapts transformation for eBooks.
* Adapts ES mapping.
* Adapts JSON schemas.
* Adapts detailed document view.
* Fixes series statement in editor.
* closes rero#1030

Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep and rerowep committed Aug 19, 2020
1 parent c12a4df commit aa5e1fe
Show file tree
Hide file tree
Showing 49 changed files with 15,290 additions and 5,663 deletions.
14,877 changes: 10,749 additions & 4,128 deletions data/documents_big.json

Large diffs are not rendered by default.

3,149 changes: 2,289 additions & 860 deletions data/documents_small.json

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,20 +1344,20 @@ def _(x):
terms=dict(field='type',
size=DOCUMENTS_AGGREGATION_SIZE)
),
author__en=dict(
terms=dict(field='facet_authors_en',
contribution__en=dict(
terms=dict(field='facet_contribution_en',
size=DOCUMENTS_AGGREGATION_SIZE)
),
author__fr=dict(
terms=dict(field='facet_authors_fr',
contribution__fr=dict(
terms=dict(field='facet_contribution_fr',
size=DOCUMENTS_AGGREGATION_SIZE)
),
author__de=dict(
terms=dict(field='facet_authors_de',
contribution__de=dict(
terms=dict(field='facet_contribution_de',
size=DOCUMENTS_AGGREGATION_SIZE)
),
author__it=dict(
terms=dict(field='facet_authors_it',
contribution__it=dict(
terms=dict(field='facet_contribution_it',
size=DOCUMENTS_AGGREGATION_SIZE)
),
language=dict(
Expand Down Expand Up @@ -1389,10 +1389,10 @@ def _(x):
'holdings.organisation.organisation_pid'
),
_('library'): and_term_filter('holdings.organisation.library_pid'),
_('author__en'): and_term_filter('facet_authors_en'),
_('author__fr'): and_term_filter('facet_authors_fr'),
_('author__de'): and_term_filter('facet_authors_de'),
_('author__it'): and_term_filter('facet_authors_it'),
_('contribution__en'): and_term_filter('facet_contribution_en'),
_('contribution__fr'): and_term_filter('facet_contribution_fr'),
_('contribution__de'): and_term_filter('facet_contribution_de'),
_('contribution__it'): and_term_filter('facet_contribution_it'),
_('language'): and_term_filter('language.value'),
_('subject'): and_term_filter('facet_subjects'),
_('status'): and_term_filter('holdings.items.status'),
Expand Down Expand Up @@ -1533,8 +1533,8 @@ def _(x):
'documents': {
'title._text.*': 3,
'titlesProper.*': 3,
'authors.name': 2,
'authors.name_*': 2,
'contribution.name': 2,
'contribution.name_*': 2,
'publicationYearText': 2,
'freeFormedPublicationDate': 2,
'subjects.*': 2
Expand Down
2 changes: 1 addition & 1 deletion rero_ils/dojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ def extract_series_statement_from_marc_field(self, key, value, data):
"""Extract the seriesStatement data from marc field data.
This function automatically selects the subfield codes according field
tag ans the Marc21 or Unimarc format. The extracted data are:
tag in the Marc21 or Unimarc format. The extracted data are:
- seriesTitle
- seriesEnumeration
Expand Down
4 changes: 4 additions & 0 deletions rero_ils/jsonschemas/common/languages-v0.0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,10 @@
"zza"
],
"form": {
"type": "selectWithSort",
"wrappers": [
"form-field-horizontal"
],
"options": [
{
"label": "lang_aar",
Expand Down
5 changes: 5 additions & 0 deletions rero_ils/manual_translations.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,8 @@ _('rdami:1001')
_('rdami:1002')
_('rdami:1003')
_('rdami:1004')

# oneOff contribution types
_('Link to person entity')
_('Person')
_('Organisation')
27 changes: 1 addition & 26 deletions rero_ils/modules/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import multiprocessing
import os
import re
import shutil
import sys
import traceback
from collections import OrderedDict
Expand All @@ -45,7 +44,6 @@
from flask.cli import with_appcontext
from flask_security.confirmable import confirm_user
from invenio_accounts.cli import commit, users
from invenio_app.factory import static_folder
from invenio_db import db
from invenio_jsonschemas.proxies import current_jsonschemas
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
Expand Down Expand Up @@ -820,7 +818,7 @@ def marc21json(xml_file, json_file_ok, xml_file_error, parallel, chunk,
click.secho(' (validation tests pid) ', nl=False)
click.secho(xml_file.name)

path = current_jsonschemas.url_to_path(get_schema_for_resource(type))
path = current_jsonschemas.url_to_path(get_schema_for_resource('doc'))
schema = current_jsonschemas.get_schema(path=path)
schema = _records_state.replace_refs(schema)
transform = Marc21toJson(xml_file, json_file_ok, xml_file_error,
Expand Down Expand Up @@ -1450,26 +1448,3 @@ def export(verbose, pid_type, outfile, pidfile, indent, schema):
click.echo('ERROR: Can not export pid:{pid}'.format(pid=pid))
outfile.write(output)
outfile.write('\n]\n')


@utils.command('set_test_static_folder')
@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False)
@with_appcontext
def set_test_static_folder(verbose):
"""Creates a static folder link for tests."""
click.secho('Create symlink for static folder', fg='green')
test_static_folder = os.path.join(sys.prefix, 'var/instance/static')
my_static_folder = static_folder()
if verbose:
msg = '\t{src} --> {dst}'.format(
src=my_static_folder,
dst=test_static_folder
)
click.secho(msg)
try:
os.unlink(test_static_folder)
except:
pass
os.makedirs(test_static_folder, exist_ok=True)
shutil.rmtree(test_static_folder, ignore_errors=True)
os.symlink(my_static_folder, test_static_folder)
19 changes: 10 additions & 9 deletions rero_ils/modules/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
from invenio_search.api import RecordsSearch

from .models import DocumentIdentifier, DocumentMetadata
from .utils import edition_format_text, publication_statement_text, \
series_statement_format_text, title_format_text_head
from .utils import create_authorized_access_point, edition_format_text, \
publication_statement_text, series_statement_format_text, \
title_format_text_head
from ..acq_order_lines.api import AcqOrderLinesSearch
from ..api import IlsRecord, IlsRecordsIndexer
from ..fetchers import id_fetcher
Expand Down Expand Up @@ -198,12 +199,12 @@ def index_persons(self, bulk=False):
"""Index all attached persons."""
from ..persons.api import Person
persons_ids = []
for author in self.get('authors', []):
for contribution in self.get('contribution', []):
person = None
ref = author.get('$ref')
ref = contribution['agent'].get('$ref')
if ref:
person = Person.get_record_by_ref(ref)
pid = author.get('pid')
pid = contribution['agent'].get('pid')
if pid:
person = Person.get_record_by_pid(pid)
if person:
Expand Down Expand Up @@ -245,13 +246,13 @@ def get_document_pids_by_issn(cls, issn_number):
def replace_refs(self):
"""Replace $ref with real data."""
from ..persons.api import Person
authors = self.get('authors', [])
for idx, author in enumerate(authors):
ref = author.get('$ref')
contributions = self.get('contribution', [])
for idx, contribution in enumerate(contributions):
ref = contribution['agent'].get('$ref')
if ref:
person = Person.get_record_by_ref(ref)
if person:
authors[idx] = person
contributions[idx]['agent'] = person
return super(Document, self).replace_refs()


Expand Down
132 changes: 96 additions & 36 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from dojson import utils
from dojson.utils import GroupableOrderedDict

from rero_ils.dojson.utils import BookFormatExtraction, ReroIlsMarc21Overdo, \
TitlePartList, add_note, build_responsibility_data, error_print, \
from rero_ils.dojson.utils import ReroIlsMarc21Overdo, TitlePartList, \
add_note, build_responsibility_data, error_print, \
extract_subtitle_and_parallel_titles_from_field_245_b, get_field_items, \
get_field_link_data, join_alternate_graphic_data, make_year, \
not_repetitive, remove_trailing_punctuation
Expand Down Expand Up @@ -103,7 +103,7 @@ def get_person_link(bibid, id, key, value):
request.status_code)

except Exception as err:
error_print('WARNING NOT MEF REF:', bibid, id, key, value)
error_print('WARNING NOT MEF REF:', bibid, id, key, value, err)
return mef_link


Expand Down Expand Up @@ -325,50 +325,110 @@ def marc21_to_titlesProper(self, key, value):
return not_repetitive(marc21.bib_id, key, value, 'a')


@marc21.over('authors', '[17][01]0..')
@marc21.over('contribution', '[17][01][01]..')
@utils.for_each_value
@utils.ignore_value
def marc21_to_author(self, key, value):
"""Get author.
authors: loop:
authors.name: 100$a [+ 100$b if it exists] or
[700$a (+$b if it exists) repetitive] or
[ 710$a repetitive (+$b if it exists, repetitive)]
authors.date: 100 $d or 700 $d (facultatif)
authors.qualifier: 100 $c or 700 $c (facultatif)
authors.type: if 100 or 700 then person, if 710 then organisation
"""
if not key[4] == '2':
author = {}
author['type'] = 'person'
def marc21_to_contribution(self, key, value):
"""Get contribution."""
if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']:
agent = {}
if value.get('0'):
refs = utils.force_list(value.get('0'))
for ref in refs:
ref = get_person_link(marc21.bib_id, ref, key, value)
if ref:
author['$ref'] = ref
agent['$ref'] = ref
# we do not have a $ref
if not author.get('$ref'):
author['name'] = ''
if not agent.get('$ref'):
agent = {'type': 'bf:Person'}
agent['preferred_name'] = ''
if value.get('a'):
data = not_repetitive(marc21.bib_id, key, value, 'a')
author['name'] = remove_trailing_punctuation(data)
author_subs = utils.force_list(value.get('b'))
if author_subs:
for author_sub in author_subs:
author['name'] += ' ' + \
remove_trailing_punctuation(author_sub)
if key[:3] == '710':
author['type'] = 'organisation'
else:
name = not_repetitive(marc21.bib_id, key, value, 'a')
agent['preferred_name'] = name.rstrip('.')

# 100|700 Person
if key[:3] in ['100', '700']:
if value.get('b'):
numeration = not_repetitive(marc21.bib_id, key, value, 'b')
agent['numeration'] = remove_trailing_punctuation(
numeration)
if value.get('c'):
data = not_repetitive(marc21.bib_id, key, value, 'c')
author['qualifier'] = remove_trailing_punctuation(data)
qualifier = not_repetitive(marc21.bib_id, key, value, 'c')
agent['qualifier'] = remove_trailing_punctuation(qualifier)
if value.get('d'):
date = not_repetitive(marc21.bib_id, key, value, 'd')
date = date.rstrip(',')
dates = remove_trailing_punctuation(date).split('-')
try:
date_of_birth = dates[0].strip()
if date_of_birth:
agent['date_of_birth'] = date_of_birth
except Exception:
pass
try:
date_of_death = dates[1].strip()
if date_of_death:
agent['date_of_death'] = date_of_death
except Exception:
pass
if value.get('q'):
fuller_form_of_name = not_repetitive(
marc21.bib_id, key, value, 'q')
agent['fuller_form_of_name'] = remove_trailing_punctuation(
fuller_form_of_name
).lstrip('(').rstrip(')')

# 710|711 Organisation
elif key[:3] in ['710', '711']:
agent['type'] = 'bf:Organisation'
if key[:3] == '711':
agent['conference'] = True
else:
agent['conference'] = False
if value.get('b'):
subordinate_units = []
for subordinate_unit in utils.force_list(value.get('b')):
subordinate_units.append(subordinate_unit.rstrip('.'))
agent['subordinate_unit'] = subordinate_units
if value.get('e'):
subordinate_units = agent.get('subordinate_unit', [])
for subordinate_unit in utils.force_list(value.get('e')):
subordinate_units.append(subordinate_unit.rstrip('.'))
agent['subordinate_unit'] = subordinate_units
if value.get('n'):
conference_number = not_repetitive(marc21.bib_id, key,
value, 'n')
agent['conference_number'] = remove_trailing_punctuation(
conference_number
).lstrip('(').rstrip(')')
if value.get('d'):
data = not_repetitive(marc21.bib_id, key, value, 'd')
author['date'] = remove_trailing_punctuation(data)
return author
conference_date = not_repetitive(marc21.bib_id, key,
value, 'd')
agent['conference_date'] = remove_trailing_punctuation(
conference_date
).lstrip('(').rstrip(')')
if value.get('c'):
conference_place = not_repetitive(marc21.bib_id, key,
value, 'c')
agent['conference_place'] = remove_trailing_punctuation(
conference_place
).lstrip('(').rstrip(')')
roles = ['aut']
if value.get('4'):
roles = []
for role in utils.force_list(value.get('4')):
roles.append(role)
else:
if key[:3] == '100':
roles = ['cre']
elif key[:3] == '711':
roles = ['aut']
else:
roles = ['ctb']
return {
'agent': agent,
'role': roles
}
else:
return None

Expand Down
Loading

0 comments on commit aa5e1fe

Please sign in to comment.