Skip to content

Commit

Permalink
data: add a class to import external records
Browse files Browse the repository at this point in the history
This classes helps to get records from external resources like BNF.
It is possible to search for records and transform them into rero-ils
json format without saving them to the database. The search result can
be displayed and filtered. It is also possible to display
the source data of an record and import the data into the database.

* Adds general class for imports.
* Adds BNF specific import class.
* Saves the search result in REDIS
  `RERO_IMPORT_CACHE` = 'redis://localhost:6379/4'
  for expiering time of `RERO_IMPORT_CACHE_EXPIRE` = 10 minutes.
* Adds argument `format` to the rest endpoints. Output mimetypes
  can now be spezified by argument or in the header of the request.

Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
2 people authored and iGor milhit committed Jul 2, 2020
1 parent e71e3d4 commit f01ceff
Show file tree
Hide file tree
Showing 10 changed files with 905 additions and 103 deletions.
93 changes: 83 additions & 10 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ def _(x):
CELERY_BEAT_SCHEDULER = 'rero_ils.schedulers.RedisScheduler'
CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379/4'

RERO_IMPORT_CACHE = 'redis://localhost:6379/5'
RERO_IMPORT_CACHE_EXPIRE = 10

# Database
# ========
#: Database URI including user and password
Expand Down Expand Up @@ -391,6 +394,9 @@ def _(x):
RECORDS_REST_DEFAULT_DELETE_PERMISSION_FACTORY = librarian_delete_permission_factory
"""Default delete permission factory: reject any request."""

REST_MIMETYPE_QUERY_ARG_NAME = 'format'
"""Name of the query argument to specify the mimetype wanted for the output."""

RECORDS_REST_ENDPOINTS = dict(
doc=dict(
pid_type='doc',
Expand All @@ -408,6 +414,10 @@ def _(x):
'rero_ils.modules.documents.serializers:json_doc_response'
)
},
record_serializers_aliases={
'json': 'application/json',
'rero+json': 'application/rero+json'
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -450,6 +460,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -484,6 +497,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -517,6 +533,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -551,6 +570,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -587,6 +609,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -625,6 +650,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -662,6 +690,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -697,6 +728,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -733,6 +767,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -767,6 +804,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -800,6 +840,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -834,6 +877,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -869,6 +915,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -903,6 +952,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -937,6 +989,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -976,6 +1031,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1010,6 +1068,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1049,6 +1110,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1086,6 +1150,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1161,10 +1228,12 @@ def _(x):
size=DOCUMENTS_AGGREGATION_SIZE)
),
organisation=dict(
terms=dict(field='holdings.organisation.organisation_pid', size=DOCUMENTS_AGGREGATION_SIZE),
terms=dict(field='holdings.organisation.organisation_pid',
size=DOCUMENTS_AGGREGATION_SIZE),
aggs=dict(
library=dict(
terms=dict(field='holdings.organisation.library_pid', size=DOCUMENTS_AGGREGATION_SIZE)
terms=dict(field='holdings.organisation.library_pid',
size=DOCUMENTS_AGGREGATION_SIZE)
)
)
),
Expand Down Expand Up @@ -1542,7 +1611,6 @@ def _(x):
RERO_ILS_APP_URL = 'https://ils.rero.ch'

RERO_ILS_PERMALINK_RERO_URL = 'http://data.rero.ch/01-{identifier}'
RERO_ILS_PERMALINK_BNF_URL = 'http://catalogue.bnf.fr/ark:/12148/{identifier}'

#: Git commit hash. If set, a link to github commit page
#: is displayed on RERO-ILS frontpage.
Expand All @@ -1555,13 +1623,6 @@ def _(x):
RERO_ILS_MEF_URL = 'https://{host}/api/mef/'.format(host='mef.rero.ch')
RERO_ILS_MEF_RESULT_SIZE = 100


#: RERO_ILS specific configurations.
RERO_ILS_APP_IMPORT_BNF_EAN = 'http://catalogue.bnf.fr/api/SRU?'\
'version=1.2&operation=searchRetrieve'\
'&recordSchema=unimarcxchange&maximumRecords=1'\
'&startRecord=1&query=bib.ean all "{}"'

RERO_ILS_APP_HELP_PAGE = (
'https://github.com/rero/rero-ils/wiki/Public-demo-help'
)
Expand Down Expand Up @@ -1621,6 +1682,9 @@ def _(x):
'application/json': ('invenio_records_rest.serializers'
':json_v1_response'),
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': ('invenio_records_rest.serializers'
':json_v1_search'),
Expand Down Expand Up @@ -1740,3 +1804,12 @@ def _(x):
'extra',
'markdown_captions'
))

# IMPORT
# ====
RERO_IMPORT_REST_ENDPOINTS = dict(
bnf=dict(
import_class='rero_ils.modules.imports.api:BnfImport',
import_size=50
)
)
80 changes: 0 additions & 80 deletions rero_ils/modules/documents/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,13 @@
from functools import wraps

import requests
import six
from dojson.contrib.marc21.utils import create_record, split_stream
from flask import Blueprint, abort, current_app, jsonify, render_template
from flask import request as flask_request
from flask_babelex import gettext as _
from flask_login import current_user
from invenio_records_ui.signals import record_viewed

from .api import Document
from .dojson.contrib.unimarctojson import unimarc
from .utils import display_alternate_graphic_first, edition_format_text, \
localized_data_name, publication_statement_text, series_format_text, \
title_format_text_alternate_graphic, title_format_text_head, \
Expand Down Expand Up @@ -108,83 +105,6 @@ def cover(isbn):
return jsonify(json.loads(response.text[len('thumb('):-1]))


@api_blueprint.route("/import/bnf/<int:ean>")
@check_permission
def import_bnf_ean(ean):
"""Import record from BNFr given a isbn 13 without dashes.
See: https://catalogue.bnf.fr/api/test.do
"""
bnf_url = current_app.config['RERO_ILS_APP_IMPORT_BNF_EAN']
try:
with requests.get(bnf_url.format(ean)) as response:
if not response.ok:
status_code = 502
response = {
'metadata': {},
'errors': {
'code': status_code,
'title': 'The BNF server returns a bad status code.',
'detail': 'Status code: {}'.format(
response.status_code)
}
}
current_app.logger.error(
'{title}: {detail}'.format(
title=response.get('title'),
detail=response.get('detail')))

else:
# read the xml date from the HTTP response
xml_data = response.content

# create a xml file in memory
xml_file = six.BytesIO()
xml_file.write(xml_data)
xml_file.seek(0)

# get the record in xml if exists
# note: the request should returns one record max
xml_record = next(split_stream(xml_file))

# convert xml in marc json
json_data = create_record(xml_record)

# convert marc json to local json format
record = unimarc.do(json_data)
response = {
'metadata': record
}
status_code = 200
# no record found!
except StopIteration:
status_code = 404
response = {
'metadata': {},
'errors': {
'code': status_code,
'title': 'The EAN was not found on the BNF server.'
}
}
# other errors
except Exception as error:
status_code = 500
response = {
'metadata': {},
'errors': {
'code': status_code,
'title': 'An unexpected error has been raise.',
'detail': 'Error: {error}'.format(error=error)
}
}
current_app.logger.error(
'{title}: {detail}'.format(
title=response.get('title'),
detail=response.get('detail')))

return jsonify(response), status_code


blueprint = Blueprint(
'documents',
__name__,
Expand Down
Loading

0 comments on commit f01ceff

Please sign in to comment.