Skip to content

Commit

Permalink
persons: link persons to source instead of MEF
Browse files Browse the repository at this point in the history
* Links documents to the source authority file, such as GND or IdRef,
  through the MEF server, instead of the MEF clusterized record. This
  is needed to provide a stable link to the authority.

My PR depends on the following `rero-ils`'s PR(s):

* rero/rero-ils-ui#269

Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep committed May 16, 2020
1 parent 875b8cc commit 77745da
Show file tree
Hide file tree
Showing 19 changed files with 1,821 additions and 1,725 deletions.
2,636 changes: 1,318 additions & 1,318 deletions data/documents_big.json

Large diffs are not rendered by default.

624 changes: 312 additions & 312 deletions data/documents_small.json

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,12 +1577,12 @@ def _(x):

#: Persons
RERO_ILS_PERSONS_MEF_SCHEMA = 'persons/person-v0.0.1.json'
RERO_ILS_PERSONS_SOURCES = ['rero', 'bnf', 'gnd', 'idref']
RERO_ILS_PERSONS_SOURCES = ['idref', 'gnd', 'rero', 'bnf']

RERO_ILS_PERSONS_LABEL_ORDER = {
'fallback': 'fr',
'fr': ['rero', 'idref', 'bnf', 'gnd'],
'de': ['gnd', 'rero', 'idref', 'bnf'],
'fr': ['idref', 'rero', 'bnf', 'gnd'],
'de': ['gnd', 'idref', 'rero', 'bnf'],
}

#: Invenio circulation configuration.
Expand Down
20 changes: 16 additions & 4 deletions rero_ils/modules/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ def dumps(self, **kwargs):
def index_persons(self, bulk=False):
"""Index all attached persons."""
persons_ids = []
for author in self.replace_refs().get('authors', []):
auth_pid = author.get('pid')
if auth_pid:
for author in self.get('authors', []):
ref = author.get('$ref')
if ref:
from ..persons.api import Person
person = Person.get_record_by_mef_pid(auth_pid)
person = Person.get_record_by_ref(ref)
if bulk:
persons_ids.append(person.id)
else:
Expand All @@ -203,6 +203,18 @@ def get_all_serial_pids(cls):
for es_document in es_documents:
yield es_document.pid

def replace_refs(self):
"""Replace $ref with real data."""
from ..persons.api import Person
authors = self.get('authors', [])
for idx, author in enumerate(authors):
ref = author.get('$ref')
if ref:
pers = Person.get_record_by_ref(ref)
if pers:
authors[idx] = pers
return super(Document, self).replace_refs()


class DocumentsIndexer(IlsRecordsIndexer):
"""Indexing documents in Elasticsearch."""
Expand Down
37 changes: 30 additions & 7 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,51 @@ def get_person_link(bibid, id, key, value):
"""Get MEF person link."""
# https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677
prod_host = 'mef.rero.ch'
test_host = 'mef.test.rero.ch'
mef_url = 'https://{host}/api/mef/'.format(host=test_host)
if os.environ.get('RERO_ILS_MEF_URL'):
mef_url = os.environ.get('RERO_ILS_MEF_URL')
active_host = 'mef.rero.ch'
mef_url = 'https://{host}/api/'.format(host=active_host)
# If the active host is not 'mef.rero.ch' please change the
# RERO_ILS_MEF_URL environment variable
env_mef_url = os.environ.get('RERO_ILS_MEF_URL')
if env_mef_url:
# We have to get rid of the last /mef in the environment url
mef_url = env_mef_url.replace('/api/mef', '/api')
mef_link = None
try:
identifier = id[1:].split(')')
url = "{mef}/?q={org}.pid:{pid}".format(
url = "{mef}mef/?q={org}.pid:{pid}".format(
mef=mef_url,
org=identifier[0].lower(),
pid=identifier[1]
)
request = requests.get(url=url)
if request.status_code == requests.codes.ok:
pid = None
data = request.json()
hits = data.get('hits', {}).get('hits')
if hits:
mef_link = hits[0].get('links').get('self')
mef_link = mef_link.replace(test_host, prod_host)
idref = hits[0].get('metadata', {}).get('idref')
gnd = hits[0].get('metadata', {}).get('gnd')
rero = hits[0].get('metadata', {}).get('rero')
if idref:
pid_type = 'idref'
pid = idref['pid']
elif gnd:
pid_type = 'gnd'
pid = gnd['pid']
elif rero:
pid_type = 'rero'
pid = rero['pid']
if pid:
mef_link = "{url}{pid_type}/{pid}".format(
url=mef_url,
pid_type=pid_type,
pid=pid
)
mef_link = mef_link.replace(active_host, prod_host)
else:
error_print('ERROR MEF REQUEST:', bibid, url,
request.status_code)

except Exception as err:
error_print('WARNING NOT MEF REF:', bibid, id, key, value)
return mef_link
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,9 @@
}
},
"$ref": {
"title": "MEF person reference",
"title": "person reference",
"type": "string",
"pattern": "^https://mef.rero.ch/api/mef/.*?$",
"pattern": "^https://mef.rero.ch/api/bnf|gnd|idref|rero/.*?$",
"form": {
"wrappers": [
"ref"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,9 @@
}
},
"$ref": {
"title": "MEF person reference",
"title": "person reference",
"type": "string",
"pattern": "^https://mef.rero.ch/api/mef/.*?$",
"pattern": "^https://mef.rero.ch/api/bnf|gnd|idref|rero/.*?$",
"form": {
"wrappers": [
"ref"
Expand Down
5 changes: 2 additions & 3 deletions rero_ils/modules/documents/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,9 @@ def enrich_document_data(sender, json=None, record=None, index=None,
# MEF person ES index update
authors = []
for author in json.get('authors', []):
pid = author.get('pid', None)
pid = author.get('pid')
if pid:
# Check presence in DB
person = Person.get_record_by_mef_pid(pid)
person = Person.get_record_by_pid(pid)
if person:
author = person.dumps_for_document()
authors.append(author)
Expand Down
12 changes: 6 additions & 6 deletions rero_ils/modules/documents/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def preprocess_record(self, pid, record, links_factory=None, **kwargs):
rec['ui_title_variants'] = variant_titles
if request and request.args.get('resolve') == '1':
rec = record.replace_refs()
authors = rec.get('authors', [])
for idx, author in enumerate(authors):
pid_value = author.get('pid')
if pid_value:
person = Person.get_record_by_mef_pid(pid_value)
authors = []
for author in rec.get('authors', []):
pers_pid = author.get('pid')
if pers_pid:
person = Person.get_record_by_pid(pers_pid)
if person:
authors[idx] = person.dumps_for_document()
authors.append(person.dumps_for_document())
data = super(JSONSerializer, self).preprocess_record(
pid=pid, record=rec, links_factory=links_factory, kwargs=kwargs)

Expand Down
1 change: 0 additions & 1 deletion rero_ils/modules/notifications/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,6 @@ def process_notifications(cls, verbose=False):
try:
pid = payload['pid']
notification = Notification.get_record_by_pid(pid)
print('----process_notifications----:', notification)
Dispatcher().dispatch_notification(notification, verbose)
message.ack()
count['send'] += 1
Expand Down
94 changes: 61 additions & 33 deletions rero_ils/modules/persons/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,56 +64,84 @@ class Person(IlsRecord):
model_cls = PersonMetadata

@classmethod
def get_record_by_mef_pid(cls, pid):
"""Get record using MEF REST API."""
try:
db.session.begin_nested()
rec = cls.get_record_by_pid(pid)
if not rec:
# No data found: request on MEF URL
data = cls._get_mef_record(pid)
def get_record_by_ref(cls, ref):
"""Get a record from DB.
If the record dos not exist get it from MEF and creat it.
"""
pers = None
ref_split = ref.split('/')
ref_type = ref_split[-2]
ref_pid = ref_split[-1]
db.session.begin_nested()
if ref_type == 'mef':
pers = cls.get_record_by_pid(ref_pid)
else:
if ref_type == 'viaf':
result = PersonsSearch().filter(
'term', viaf_pid=ref_pid
).source('pid').scan()
else:
result = PersonsSearch().filter(
{'term': {'{type}.pid'.format(type=ref_type): ref_pid}}
).source('pid').scan()
try:
pid = next(result).pid
pers = cls.get_record_by_pid(pid)
except StopIteration:
pass
if not pers:
# We dit not find the record in DB get it from MEF and create it.
try:
data = cls._get_mef_data_by_type(ref_pid, ref_type)
metadata = data['metadata']
# Register MEF person
metadata = data.get('metadata')
if '$schema' in metadata:
del metadata['$schema']
# we have to commit because create
# uses db.session.begin_nested
rec = cls.create(metadata, dbcommit=True)
except Exception as err:
db.session.rollback()
current_app.logger.error('Get MEF record: {pid}'.format(
pid=pid
))
current_app.logger.error(err)

return None
pers = cls.create(metadata, dbcommit=True)
except Exception as err:
db.session.rollback()
current_app.logger.error('Get MEF record: {type}:{pid}'.format(
type=ref_type,
pid=ref_pid
))
current_app.logger.error(err)
return None
db.session.commit()
rec.reindex()
return rec
if pers:
pers.reindex()
return pers

def dumps_for_document(self):
"""Transform the record into document author format."""
return self._get_author_for_document()

@classmethod
def _get_mef_record(cls, pid):
def _get_mef_data_by_type(cls, pid, pid_type):
"""Request MEF REST API in JSON format."""
url = "{url}{pid}".format(
url=current_app.config.get('RERO_ILS_MEF_URL'),
pid=pid)
request = requests_get(
url=url,
params=dict(
resolve=1,
sources=1
))
url = current_app.config.get('RERO_ILS_MEF_URL')
if pid_type == 'mef':
mef_url = "{url}?q=pid:{pid}".format(url=url, pid=pid)
else:
if pid_type == 'viaf':
mef_url = "{url}?q=viaf_pid:{pid}".format(url=url, pid=pid)
else:
mef_url = "{url}?q={type}.pid:{pid}".format(
url=url,
type=pid_type,
pid=pid
)
request = requests_get(url=mef_url, params=dict(resolve=1, sources=1))
if request.status_code == requests_codes.ok:
return request.json()
data = request.json().get('hits', {}).get('hits', [None])
return data[0]
else:
current_app.logger.error(
'Mef resolver no metadata: {result} {url}'.format(
result=request.json(),
url=url
url=mef_url
)
)
raise Exception('unable to resolve')
Expand Down Expand Up @@ -195,6 +223,6 @@ def organisation_pids(self):


class PersonsIndexer(IlsRecordsIndexer):
"""Holdings indexing class."""
"""Person indexing class."""

record_cls = Person
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#}


{% for agency in ["rero", "gnd", "idref", "bnf"] %}
{% for agency in ["idref", "gnd", "rero", "bnf"] %}
{% if record[agency] %}
<article class="card m-md-4 m-2">
<div id="online_access" class="card-header p-2">
Expand Down
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,13 @@ def app_config(app_config):
app_config['WIKI_CONTENT_DIR'] = help_test_dir
app_config['WIKI_UPLOAD_FOLDER'] = join(help_test_dir, 'files')
return app_config
# mef_url = os.environ.get('RERO_ILS_MEF_URL')
# if mef_url:
# del os.environ['RERO_ILS_MEF_URL']
# yield app_config
# if mef_url:
# os.environ['RERO_ILS_MEF_URL'] = mef_url



@pytest.fixture(scope='module')
Expand Down
2 changes: 1 addition & 1 deletion tests/data/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -1244,7 +1244,7 @@
"authors": [
{
"type": "person",
"$ref": "https://mef.rero.ch/api/mef/pers1"
"$ref": "https://mef.rero.ch/api/rero/A017671081"
}
],
"title": [
Expand Down
4 changes: 2 additions & 2 deletions tests/data/documents.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
"authors": [
{
"type": "person",
"$ref": "https://mef.rero.ch/api/mef/20109313"
"$ref": "https://mef.rero.ch/api/idref/20109313"
},
{
"type": "person",
"$ref": "https://mef.rero.ch/api/mef/25552024"
"$ref": "https://mef.rero.ch/api/gnd/25552024"
}
],
"title": "Le due tensioni : appunti per una ideologia della letteratura",
Expand Down
10 changes: 8 additions & 2 deletions tests/fixtures/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,14 @@ def person_data_tmp(data):
def person_response_data(person_data):
"""Load mef person response data."""
json_data = {
'id': person_data['pid'],
'metadata': person_data
'hits': {
'hits': [
{
'id': person_data['pid'],
'metadata': person_data
}
]
}
}
return json_data

Expand Down
5 changes: 4 additions & 1 deletion tests/ui/documents/test_documents_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

"""Document mapping tests."""

from copy import deepcopy

from elasticsearch_dsl.query import MultiMatch
from utils import get_mapping

Expand All @@ -29,8 +31,9 @@ def test_document_es_mapping(es, db, org_martigny,
search = DocumentsSearch()
mapping = get_mapping(search.Meta.index)
assert mapping
data = deepcopy(document_data_ref)
Document.create(
document_data_ref,
data,
dbcommit=True,
reindex=True,
delete_pid=True
Expand Down
Loading

0 comments on commit 77745da

Please sign in to comment.