Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

US1274: Import from BnF #1076

Merged
merged 2 commits into from
Jul 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 83 additions & 10 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ def _(x):
CELERY_BEAT_SCHEDULER = 'rero_ils.schedulers.RedisScheduler'
CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379/4'

RERO_IMPORT_CACHE = 'redis://localhost:6379/5'
RERO_IMPORT_CACHE_EXPIRE = 10

# Database
# ========
#: Database URI including user and password
Expand Down Expand Up @@ -391,6 +394,9 @@ def _(x):
RECORDS_REST_DEFAULT_DELETE_PERMISSION_FACTORY = librarian_delete_permission_factory
"""Default delete permission factory: reject any request."""

REST_MIMETYPE_QUERY_ARG_NAME = 'format'
"""Name of the query argument to specify the mimetype wanted for the output."""

RECORDS_REST_ENDPOINTS = dict(
doc=dict(
pid_type='doc',
Expand All @@ -408,6 +414,10 @@ def _(x):
'rero_ils.modules.documents.serializers:json_doc_response'
)
},
record_serializers_aliases={
'json': 'application/json',
'rero+json': 'application/rero+json'
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -450,6 +460,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -484,6 +497,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -517,6 +533,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -551,6 +570,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -587,6 +609,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -625,6 +650,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -662,6 +690,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -697,6 +728,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -733,6 +767,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -767,6 +804,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -800,6 +840,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -834,6 +877,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -869,6 +915,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -903,6 +952,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -937,6 +989,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -976,6 +1031,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1010,6 +1068,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1049,6 +1110,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1086,6 +1150,9 @@ def _(x):
'rero_ils.modules.serializers:json_v1_response'
)
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': (
'rero_ils.modules.serializers:json_v1_search'
Expand Down Expand Up @@ -1161,10 +1228,12 @@ def _(x):
size=DOCUMENTS_AGGREGATION_SIZE)
),
organisation=dict(
terms=dict(field='holdings.organisation.organisation_pid', size=DOCUMENTS_AGGREGATION_SIZE),
terms=dict(field='holdings.organisation.organisation_pid',
size=DOCUMENTS_AGGREGATION_SIZE),
aggs=dict(
library=dict(
terms=dict(field='holdings.organisation.library_pid', size=DOCUMENTS_AGGREGATION_SIZE)
terms=dict(field='holdings.organisation.library_pid',
size=DOCUMENTS_AGGREGATION_SIZE)
)
)
),
Expand Down Expand Up @@ -1536,7 +1605,6 @@ def _(x):
RERO_ILS_APP_URL = 'https://ils.rero.ch'

RERO_ILS_PERMALINK_RERO_URL = 'http://data.rero.ch/01-{identifier}'
RERO_ILS_PERMALINK_BNF_URL = 'http://catalogue.bnf.fr/ark:/12148/{identifier}'

#: Git commit hash. If set, a link to github commit page
#: is displayed on RERO-ILS frontpage.
Expand All @@ -1549,13 +1617,6 @@ def _(x):
RERO_ILS_MEF_URL = 'https://{host}/api/mef/'.format(host='mef.rero.ch')
RERO_ILS_MEF_RESULT_SIZE = 100


#: RERO_ILS specific configurations.
RERO_ILS_APP_IMPORT_BNF_EAN = 'http://catalogue.bnf.fr/api/SRU?'\
'version=1.2&operation=searchRetrieve'\
'&recordSchema=unimarcxchange&maximumRecords=1'\
'&startRecord=1&query=bib.ean all "{}"'

RERO_ILS_APP_HELP_PAGE = (
'https://github.com/rero/rero-ils/wiki/Public-demo-help'
)
Expand Down Expand Up @@ -1615,6 +1676,9 @@ def _(x):
'application/json': ('invenio_records_rest.serializers'
':json_v1_response'),
},
record_serializers_aliases={
'json': 'application/json',
},
search_serializers={
'application/json': ('invenio_records_rest.serializers'
':json_v1_search'),
Expand Down Expand Up @@ -1734,3 +1798,12 @@ def _(x):
'extra',
'markdown_captions'
))

# IMPORT
# ====
RERO_IMPORT_REST_ENDPOINTS = dict(
bnf=dict(
import_class='rero_ils.modules.imports.api:BnfImport',
import_size=50
)
)
93 changes: 0 additions & 93 deletions rero_ils/modules/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import polib
import pycountry
import requests
import six
import xmltodict
import yaml
from babel import Locale, core
Expand Down Expand Up @@ -67,7 +66,6 @@
from .patrons.cli import import_users
from .tasks import process_bulk_queue
from .utils import read_json_record
from ..modules.documents.dojson.contrib.unimarctojson import unimarc
from ..modules.providers import append_fixtures_new_identifiers

_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)
Expand Down Expand Up @@ -1377,97 +1375,6 @@ def dump_es_mappings(verbose, outfile):
outfile.write('\n')


def response_test(count, response, exit, verbose):
"""Test the records in the response."""
# read the xml date from the HTTP response
xml_data = response.content

# create a xml file in memory
xml_file = six.BytesIO()
xml_file.write(xml_data)
xml_file.seek(0)

for xml_record in split_stream(xml_file):
count += 1
json_data = create_record(xml_record)
try:
record = unimarc.do(json_data)
except Exception as err:
click.secho('ERROR: {err}'.format(err=err), fg='red')
traceback.print_exc()
click.echo(json.dumps(json_data, indent=2))
if exit:
sys.exit(1)

if verbose:
click.echo('{count:10d}\t{info}'.format(
count=count,
info=record.get('identifiedBy')
))
else:
click.echo('{count:10d}'.format(count=count), nl=False)
sys.stdout.flush()
click.echo('{:\b<10}'.format(''), nl=False)

return count


@utils.command('bnf_import_test')
@click.option('-S', '--search', default='',
help='Serach term.')
@click.option('-i', '--index', default='bib.recordid',
help='SRU index: https://catalogue.bnf.fr/api/test.do'
'default: bib.recordid')
@click.option('-e', '--exit', default=False, is_flag=True,
help='Exit on error.')
@click.option('-v', '--verbose', default=False, is_flag=True,
help='Verbose output.')
@click.option('-s', '--start', default=1,
help='Start at record to search.')
@click.option('-c', '--chunk_size', default=100,
help='How many records to fetch in every request.')
def bnf_import_test(search, index, exit, verbose, start, chunk_size):
"""BNF import tests."""
bnf_url_all = 'https://catalogue.bnf.fr/api/SRU?version=1.2'\
'&operation=searchRetrieve&query=bib.status all "validated"'\
'&recordSchema=unimarcxchange'\
'&maximumRecords={size}&startRecord={start}'

bnf_url_ean = 'http://catalogue.bnf.fr/api/SRU?'\
'version=1.2&operation=searchRetrieve'\
'&recordSchema=unimarcxchange&maximumRecords=1'\
'&startRecord=1&query={index} all "{search}"'

count = 0
if search:
click.secho('Run BNF import test: {index} {search}'.format(
index=index,
search=search
), fg='green')
with requests.get(bnf_url_ean.format(
index=index,
search=search
)) as response:
if not response.ok:
sys.exit(0)
else:
count = response_test(count=count, response=response,
exit=exit, verbose=verbose)
else:
click.secho('Run BNF import tests:', fg='green')
while True:
with requests.get(bnf_url_all.format(
size=chunk_size,
start=start
)) as response:
if not response.ok:
sys.exit(0)
else:
count = response_test(count=count, response=response,
exit=exit, verbose=verbose)
start += chunk_size


@utils.command('export')
@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False)
@click.option('-p', '--pid_type', 'pid_type', default='doc')
Expand Down
Loading