Skip to content

Commit

Permalink
persons: link persons to source instead of MEF
Browse files Browse the repository at this point in the history
* Links documents to the source authority file, such as GND or IdRef,
  through the MEF server, instead of the MEF clusterized record. This
  is needed to provide a stable link to the authority.

My PR depends on the following `rero-ils`'s PR(s):

* rero/rero-ils-ui#269

Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep committed May 14, 2020
1 parent edf5070 commit 2fbb8c3
Show file tree
Hide file tree
Showing 16 changed files with 1,803 additions and 1,720 deletions.
2,636 changes: 1,318 additions & 1,318 deletions data/documents_big.json

Large diffs are not rendered by default.

624 changes: 312 additions & 312 deletions data/documents_small.json

Large diffs are not rendered by default.

20 changes: 16 additions & 4 deletions rero_ils/modules/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ def dumps(self, **kwargs):
def index_persons(self, bulk=False):
"""Index all attached persons."""
persons_ids = []
for author in self.replace_refs().get('authors', []):
auth_pid = author.get('pid')
if auth_pid:
for author in self.get('authors', []):
ref = author.get('$ref')
if ref:
from ..persons.api import Person
person = Person.get_record_by_mef_pid(auth_pid)
person = Person.get_record_by_ref(ref)
if bulk:
persons_ids.append(person.id)
else:
Expand All @@ -203,6 +203,18 @@ def get_all_serial_pids(cls):
for es_document in es_documents:
yield es_document.pid

def replace_refs(self):
"""Replace $ref with real data."""
from ..persons.api import Person
authors = self.get('authors', [])
for idx, author in enumerate(authors):
ref = author.get('$ref')
if ref:
pers = Person.get_record_by_ref(ref)
if pers:
authors[idx] = pers
return super(Document, self).replace_refs()


class DocumentsIndexer(IlsRecordsIndexer):
"""Indexing documents in Elasticsearch."""
Expand Down
29 changes: 23 additions & 6 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,45 @@ def get_person_link(bibid, id, key, value):
"""Get MEF person link."""
# https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677
prod_host = 'mef.rero.ch'
test_host = 'mef.test.rero.ch'
mef_url = 'https://{host}/api/mef/'.format(host=test_host)
if os.environ.get('RERO_ILS_MEF_URL'):
mef_url = os.environ.get('RERO_ILS_MEF_URL')
test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch')
mef_url = 'https://{host}/api/'.format(host=test_host)
mef_link = None
try:
identifier = id[1:].split(')')
url = "{mef}/?q={org}.pid:{pid}".format(
url = "{mef}mef/?q={org}.pid:{pid}".format(
mef=mef_url,
org=identifier[0].lower(),
pid=identifier[1]
)
request = requests.get(url=url)
if request.status_code == requests.codes.ok:
pid = None
data = request.json()
hits = data.get('hits', {}).get('hits')
if hits:
mef_link = hits[0].get('links').get('self')
idref = hits[0].get('metadata', {}).get('idref')
gnd = hits[0].get('metadata', {}).get('gnd')
rero = hits[0].get('metadata', {}).get('rero')
if idref:
pid_type = 'idref'
pid = idref['pid']
elif gnd:
pid_type = 'gnd'
pid = gnd['pid']
elif rero:
pid_type = 'rero'
pid = rero['pid']
if pid:
mef_link = "{url}{pid_type}/{pid}".format(
url=mef_url,
pid_type=pid_type,
pid=pid
)
mef_link = mef_link.replace(test_host, prod_host)
else:
error_print('ERROR MEF REQUEST:', bibid, url,
request.status_code)

except Exception as err:
error_print('WARNING NOT MEF REF:', bibid, id, key, value)
return mef_link
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,9 @@
}
},
"$ref": {
"title": "MEF person reference",
"title": "person reference",
"type": "string",
"pattern": "^https://mef.rero.ch/api/mef/.*?$",
"pattern": "^https://mef.rero.ch/api/bnf|gnd|idref|rero/.*?$",
"form": {
"wrappers": [
"ref"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,9 @@
}
},
"$ref": {
"title": "MEF person reference",
"title": "person reference",
"type": "string",
"pattern": "^https://mef.rero.ch/api/mef/.*?$",
"pattern": "^https://mef.rero.ch/api/bnf|gnd|idref|rero/.*?$",
"form": {
"wrappers": [
"ref"
Expand Down
5 changes: 2 additions & 3 deletions rero_ils/modules/documents/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,9 @@ def enrich_document_data(sender, json=None, record=None, index=None,
# MEF person ES index update
authors = []
for author in json.get('authors', []):
pid = author.get('pid', None)
pid = author.get('pid')
if pid:
# Check presence in DB
person = Person.get_record_by_mef_pid(pid)
person = Person.get_record_by_pid(pid)
if person:
author = person.dumps_for_document()
authors.append(author)
Expand Down
12 changes: 6 additions & 6 deletions rero_ils/modules/documents/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def preprocess_record(self, pid, record, links_factory=None, **kwargs):
rec['ui_title_variants'] = variant_titles
if request and request.args.get('resolve') == '1':
rec = record.replace_refs()
authors = rec.get('authors', [])
for idx, author in enumerate(authors):
pid_value = author.get('pid')
if pid_value:
person = Person.get_record_by_mef_pid(pid_value)
authors = []
for author in rec.get('authors', []):
pers_pid = author.get('pid')
if pers_pid:
person = Person.get_record_by_pid(pers_pid)
if person:
authors[idx] = person.dumps_for_document()
authors.append(person.dumps_for_document())
data = super(JSONSerializer, self).preprocess_record(
pid=pid, record=rec, links_factory=links_factory, kwargs=kwargs)

Expand Down
1 change: 0 additions & 1 deletion rero_ils/modules/notifications/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,6 @@ def process_notifications(cls, verbose=False):
try:
pid = payload['pid']
notification = Notification.get_record_by_pid(pid)
print('----process_notifications----:', notification)
Dispatcher().dispatch_notification(notification, verbose)
message.ack()
count['send'] += 1
Expand Down
94 changes: 61 additions & 33 deletions rero_ils/modules/persons/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,56 +64,84 @@ class Person(IlsRecord):
model_cls = PersonMetadata

@classmethod
def get_record_by_mef_pid(cls, pid):
"""Get record using MEF REST API."""
try:
db.session.begin_nested()
rec = cls.get_record_by_pid(pid)
if not rec:
# No data found: request on MEF URL
data = cls._get_mef_record(pid)
def get_record_by_ref(cls, ref):
"""Get a record from DB.
If the record dos not exist get it from MEF and creat it.
"""
pers = None
ref_split = ref.split('/')
ref_type = ref_split[-2]
ref_pid = ref_split[-1]
db.session.begin_nested()
if ref_type == 'mef':
pers = cls.get_record_by_pid(ref_pid)
else:
if ref_type == 'viaf':
result = PersonsSearch().filter(
'term', viaf_pid=ref_pid
).source('pid').scan()
else:
result = PersonsSearch().filter(
{'term': {'{type}.pid'.format(type=ref_type): ref_pid}}
).source('pid').scan()
try:
pid = next(result).pid
pers = cls.get_record_by_pid(pid)
except StopIteration:
pass
if not pers:
# We dit not find the record in DB get it from MEF and create it.
try:
data = cls._get_mef_data_by_type(ref_pid, ref_type)
metadata = data['metadata']
# Register MEF person
metadata = data.get('metadata')
if '$schema' in metadata:
del metadata['$schema']
# we have to commit because create
# uses db.session.begin_nested
rec = cls.create(metadata, dbcommit=True)
except Exception as err:
db.session.rollback()
current_app.logger.error('Get MEF record: {pid}'.format(
pid=pid
))
current_app.logger.error(err)

return None
pers = cls.create(metadata, dbcommit=True)
except Exception as err:
db.session.rollback()
current_app.logger.error('Get MEF record: {type}:{pid}'.format(
type=ref_type,
pid=ref_pid
))
current_app.logger.error(err)
return None
db.session.commit()
rec.reindex()
return rec
if pers:
pers.reindex()
return pers

def dumps_for_document(self):
"""Transform the record into document author format."""
return self._get_author_for_document()

@classmethod
def _get_mef_record(cls, pid):
def _get_mef_data_by_type(cls, pid, pid_type):
"""Request MEF REST API in JSON format."""
url = "{url}{pid}".format(
url=current_app.config.get('RERO_ILS_MEF_URL'),
pid=pid)
request = requests_get(
url=url,
params=dict(
resolve=1,
sources=1
))
url = current_app.config.get('RERO_ILS_MEF_URL')
if pid_type == 'mef':
mef_url = "{url}?q=pid:{pid}".format(url=url, pid=pid)
else:
if pid_type == 'viaf':
mef_url = "{url}?q=viaf_pid:{pid}".format(url=url, pid=pid)
else:
mef_url = "{url}?q={type}.pid:{pid}".format(
url=url,
type=pid_type,
pid=pid
)
request = requests_get(url=mef_url, params=dict(resolve=1, sources=1))
if request.status_code == requests_codes.ok:
return request.json()
data = request.json().get('hits', {}).get('hits', [None])
return data[0]
else:
current_app.logger.error(
'Mef resolver no metadata: {result} {url}'.format(
result=request.json(),
url=url
url=mef_url
)
)
raise Exception('unable to resolve')
Expand Down Expand Up @@ -195,6 +223,6 @@ def organisation_pids(self):


class PersonsIndexer(IlsRecordsIndexer):
"""Holdings indexing class."""
"""Person indexing class."""

record_cls = Person
2 changes: 1 addition & 1 deletion tests/data/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -1244,7 +1244,7 @@
"authors": [
{
"type": "person",
"$ref": "https://mef.rero.ch/api/mef/pers1"
"$ref": "https://mef.rero.ch/api/rero/A017671081"
}
],
"title": [
Expand Down
4 changes: 2 additions & 2 deletions tests/data/documents.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
"authors": [
{
"type": "person",
"$ref": "https://mef.rero.ch/api/mef/20109313"
"$ref": "https://mef.rero.ch/api/idref/20109313"
},
{
"type": "person",
"$ref": "https://mef.rero.ch/api/mef/25552024"
"$ref": "https://mef.rero.ch/api/gnd/25552024"
}
],
"title": "Le due tensioni : appunti per una ideologia della letteratura",
Expand Down
10 changes: 8 additions & 2 deletions tests/fixtures/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,14 @@ def person_data_tmp(data):
def person_response_data(person_data):
"""Load mef person response data."""
json_data = {
'id': person_data['pid'],
'metadata': person_data
'hits': {
'hits': [
{
'id': person_data['pid'],
'metadata': person_data
}
]
}
}
return json_data

Expand Down
5 changes: 4 additions & 1 deletion tests/ui/documents/test_documents_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

"""Document mapping tests."""

from copy import deepcopy

from elasticsearch_dsl.query import MultiMatch
from utils import get_mapping

Expand All @@ -29,8 +31,9 @@ def test_document_es_mapping(es, db, org_martigny,
search = DocumentsSearch()
mapping = get_mapping(search.Meta.index)
assert mapping
data = deepcopy(document_data_ref)
Document.create(
document_data_ref,
data,
dbcommit=True,
reindex=True,
delete_pid=True
Expand Down
42 changes: 21 additions & 21 deletions tests/ui/persons/test_persons_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
from __future__ import absolute_import, print_function

import mock
import pytest
from utils import flush_index, mock_response

from rero_ils.modules.persons.api import Person, person_id_fetcher
from rero_ils.modules.persons.api import Person, PersonsSearch, \
person_id_fetcher


def test_person_create(app, person_data_tmp, caplog):
Expand Down Expand Up @@ -62,24 +63,23 @@ def test_person_create(app, person_data_tmp, caplog):
)


def test_person_mef_create(app, person_data_tmp):
@mock.patch('rero_ils.modules.persons.api.requests_get')
def test_person_mef_create(mock_persons_mef_get, app, person_data_tmp,
person_response_data):
"""Test MEF person creation."""
# with pytest.raises(Exception):
assert not Person.get_record_by_mef_pid('xxx')
with pytest.raises(Exception):
Person._get_mef_record('xxx')

count = Person.count()
with mock.patch(
'rero_ils.modules.persons.api.Person._get_mef_record',
mock.MagicMock(return_value={'metadata': person_data_tmp})
):
pers_mef = Person.get_record_by_mef_pid('pers1')
assert pers_mef == person_data_tmp
assert Person.count() == count + 1
pers_mef.pop('rero')
pers_mef.pop('gnd')
pers_mef['sources'] = ['bnf']
pers_mef.update(pers_mef, dbcommit=True)
pers_db = Person.get_record_by_mef_pid('pers1')
assert pers_db['sources'] == ['bnf']
mock_persons_mef_get.return_value = mock_response(
json_data=person_response_data
)
pers_mef = Person.get_record_by_ref(
'https://mef.rero.ch/api/rero/A017671081')
flush_index(PersonsSearch.Meta.index)
assert pers_mef == person_data_tmp
assert Person.count() == count + 1
pers_mef.pop('rero')
pers_mef.pop('gnd')
pers_mef['sources'] = ['bnf']
pers_mef.replace(pers_mef, dbcommit=True)
pers_db = Person.get_record_by_ref(
'https://mef.rero.ch/api/gnd/172759757')
assert pers_db['sources'] == ['bnf']
Loading

0 comments on commit 2fbb8c3

Please sign in to comment.