Skip to content

Commit

Permalink
persons: fixes removing persons records when document is suppressed
Browse files Browse the repository at this point in the history
* Fix indexer schema mapping for mef authorities
* Reforctoring of document listeners using MefPerson class
* Closes rero#601

Co-Authored-by: Renaud Michotte <renaud.michotte@gmail.com>
  • Loading branch information
zannkukai committed Nov 15, 2019
1 parent 68b5a67 commit e6b2683
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 128 deletions.
6 changes: 5 additions & 1 deletion rero_ils/modules/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ def can_edit(self):
# TODO: Make this condition on data
return not self.harvested

def get_linked_authors(self):
"""Get a list containing all authors linked to an authority"""
return [a['$ref'] for a in self.get('authors', []) if a.get('$ref')]

def get_number_of_items(self):
"""Get number of items for document."""
from ..items.api import ItemsSearch
Expand Down Expand Up @@ -138,6 +142,6 @@ def dumps(self, **kwargs):
provision_activity["_text"] = \
publication_statement_text(provision_activity)
series = dump.get('series')
for series_element in series:
for series_element in series or []:
series_element["_text"] = series_format_text(series_element)
return dump
132 changes: 43 additions & 89 deletions rero_ils/modules/documents/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,17 @@

"""Signals connector for Document."""

from flask import current_app
from invenio_indexer.api import RecordIndexer
from invenio_jsonschemas import current_jsonschemas
from invenio_search import current_search
from requests import codes as requests_codes
from requests import get as requests_get

from elasticsearch.exceptions import NotFoundError

from .views import create_publication_statement
from ..documents.api import DocumentsSearch
from ..holdings.api import Holding
from ..item_types.api import ItemType
from ..items.api import Item
from ..locations.api import Location
from ..mef_persons.api import MefPerson
from ..mef_persons.utils import resolve_mef_uri
from ..organisations.api import Organisation


Expand Down Expand Up @@ -128,91 +126,47 @@ def mef_person_revert(sender, *args, **kwargs):


def mef_person_update_index(sender, *args, **kwargs):
"""Index MEF person in ES."""
"""Index MEF person in ES.
When indexing a document, this function will check if some author are
authorities. Foreach authority, this function will check if it is already
created ; if not thne the authority will be created
:param record: the document being processed
"""
record = kwargs['record']
if 'documents' in record.get('$schema', ''):
authors = record.get('authors', [])
for author in authors:
mef_url = author.get('$ref')
if mef_url:
mef_url = mef_url.replace(
'mef.rero.ch',
current_app.config['RERO_ILS_MEF_HOST']
)
request = requests_get(url=mef_url, params=dict(
resolve=1,
sources=1
))
if request.status_code == requests_codes.ok:
data = request.json()
id = data['id']
data = data.get('metadata')
if data:
data['id'] = id
data['$schema'] = current_jsonschemas.path_to_url(
current_app.config[
'RERO_ILS_PERSONS_MEF_SCHEMA'
]
)
indexer = RecordIndexer()
index, doc_type = indexer.record_to_index(data)
indexer.client.index(
id=id,
index=index,
doc_type=doc_type,
body=data,
)
current_search.flush_and_refresh(index)
else:
current_app.logger.error(
'Mef resolver request error: {stat} {url}'.format(
stat=request.status_code,
url=mef_url
)
)
raise Exception('unable to resolve')
if 'documents' not in record.get('$schema', ''):
return

refs = [a['$ref'] for a in record.get('authors', []) if a.get('$ref')]
for author_ref in refs:
data = resolve_mef_uri(author_ref)
if data and data.get('id'):
try:
person = MefPerson.get_record_by_pid(data['id'])
person.reindex()
except NotFoundError:
print("author_ref [{r}] not found, create it ...".format(r=author_ref))
person = MefPerson.create(data)
person.reindex()


def mef_person_delete(sender, *args, **kwargs):
"""Delete signal."""
"""Delete signal about a document
When deleting a document, we need to check if some authors are
authorities. If one of these authority has no other document linked to it
then this authority must be deleted from index.
:param record: the document being suppressed
"""
record = kwargs['record']
if 'documents' in record.get('$schema', ''):
authors = record.get('authors', [])
for author in authors:
mef_url = author.get('$ref')
if mef_url:
mef_url = mef_url.replace(
'mef.rero.ch',
current_app.config['RERO_ILS_MEF_HOST']
)
request = requests_get(url=mef_url, params=dict(
resolve=1,
sources=1
))
if request.status_code == requests_codes.ok:
data = request.json()
id = data['id']
data = data.get('metadata')
if data:
search = DocumentsSearch()
count = search.filter(
'match',
authors__pid=id
).execute().hits.total
if count == 1:
indexer = RecordIndexer()
index, doc_type = indexer.record_to_index(data)
indexer.client.delete(
id=id,
index=index,
doc_type=doc_type
)
current_search.flush_and_refresh(index)
else:
current_app.logger.error(
'Mef resolver request error: {result} {url}'.format(
result=request.status_code,
url=mef_url
)
)
raise Exception('unable to resolve')
if 'documents' not in record.get('$schema', ''):
return

for author_ref in record.get_linked_authors():
data = resolve_mef_uri(author_ref)
if data and data.get('id'):
person = MefPerson.get_record(data['id'])
if len(person.get_linked_document_pids()) == 1:
person.delete()
4 changes: 2 additions & 2 deletions rero_ils/modules/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from invenio_indexer.signals import before_record_index
from invenio_oaiharvester.signals import oaiharvest_finished
from invenio_records.signals import after_record_delete, after_record_insert, \
after_record_revert, after_record_update
after_record_revert, after_record_update, before_record_delete

from .documents.listener import enrich_document_data, mef_person_delete, \
mef_person_insert, mef_person_revert, mef_person_update
Expand Down Expand Up @@ -115,5 +115,5 @@ def register_signals(self):

after_record_insert.connect(mef_person_insert)
after_record_update.connect(mef_person_update)
after_record_delete.connect(mef_person_delete)
before_record_delete.connect(mef_person_delete)
after_record_revert.connect(mef_person_revert)
5 changes: 5 additions & 0 deletions rero_ils/modules/indexer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def record_to_index(record):
# put all document in the same index
if re.search(r'/documents/', schema):
schema = re.sub(r'-.*\.json', '.json', schema)
# authorities specific transformation
if re.search(r'/authorities/', schema):
schema = re.sub(r'/authorities/', '/persons/', schema)
schema = re.sub(r'mef-person', 'mef_person', schema)

index, doc_type = schema_to_index(schema, index_names=index_names)

if index and doc_type:
Expand Down
41 changes: 5 additions & 36 deletions rero_ils/modules/mef_persons/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,10 @@ class MefPerson(ElasticsearchRecord):
@classmethod
def get_record_by_pid(cls, pid):
"""Get elasticsearch record by pid value."""

#print(cls.get_documents_by_person_pid(pid))
pers = MefPerson(cls.get_record(pid))
return MefPerson(cls.get_record(pid))

def get_number_of_linked_documents(self, org_pid=None):
"""Get number of linked documents for person."""
def get_linked_document_pids(self, org_pid=None):
"""Get list of linked document pids for person."""
search = DocumentsSearch()
search = search.filter(
'term',
Expand All @@ -80,36 +77,8 @@ def get_number_of_linked_documents(self, org_pid=None):
search = search.filter(
'term', holdings__organisation__organisation_pid=org_pid
)
results = search.source().count()
return results
return [hit.pid for hit in search.scan()]

def get_linked_documents(self, org_pid=None):
"""Get linked documents."""
search = DocumentsSearch()
search = search.filter(
'term',
authors__pid=self.pid
)
if org_pid:
search = search.filter(
'term', holdings__organisation__organisation_pid=org_pid
)

return [result.get('_source') for result in search.execute().hits.hits]

@classmethod
def get_record_by_mef_uri(cls, uri):
"""Get elasticsearch record by mef uri."""
mef_url = uri.replace(
'mef.rero.ch',
current_app.config['RERO_ILS_MEF_HOST']
)
request = requests_get(url=mef_url, params=dict(
resolve=1,
sources=1
))
if request.status_code == requests_codes.ok:
data = request.json()
id = data['id']
return MefPerson(cls.get_record(id))

for pid in self.get_linked_document_pids():
yield Document.get_record_by_pid(pid)
22 changes: 22 additions & 0 deletions rero_ils/modules/mef_persons/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from flask import current_app
from requests import codes as requests_codes
from requests import get as requests_get


def resolve_mef_uri(uri):
"""Resolve a mef uri to get associated data.
Call the external resource corresponding to the uri and get the associated
data if data are valid
:return associated uri data as a dictionnary ; Return None if resolution
failed or data are inconsistant
"""
mef_url = uri.replace(
'mef.rero.ch',
current_app.config['RERO_ILS_MEF_HOST']
)
r = requests_get(url=mef_url, params={'resolve': 1, 'sources': 1})
if r.status_code == requests_codes.ok:
data = r.json()
if data.get('id'):
return data

0 comments on commit e6b2683

Please sign in to comment.