Skip to content

Commit

Permalink
URN: redirect to the right URL
Browse files Browse the repository at this point in the history
- Register a new URN with the server name organisation as base URL.
- Fixes URN creation.
- Adds the patch DNB method to already registered URN.
- Adds the get DNB method to get URN information.
- Marks an URN pid when a document is deleted.
- Fixes the query to retrieve all the unregistered URNs.
- Adds a new optional parameter to specify the data when a test document
  is created.
- Closes: rero#849.
- Closes: rero#811.

Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
  • Loading branch information
jma committed Jun 6, 2023
1 parent 959e091 commit 5c02320
Show file tree
Hide file tree
Showing 11 changed files with 2,179 additions and 2,258 deletions.
4,036 changes: 1,892 additions & 2,144 deletions poetry.lock

Large diffs are not rendered by default.

22 changes: 21 additions & 1 deletion sonar/modules/documents/cli/urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

"""URN specific CLI commands."""

import json
import os

import click
Expand All @@ -28,6 +29,7 @@
from invenio_pidstore.models import PersistentIdentifier

from sonar.modules.documents.api import DocumentRecord
from sonar.modules.documents.dnb import DnbUrnService
from sonar.modules.documents.urn import Urn
from sonar.snl.ftp import SNLRepository

Expand All @@ -36,15 +38,33 @@
def urn():
"""URN specific commands."""

@urn.command()
@click.argument('urn')
@with_appcontext
def get(urn):
"""Get a registered URN information."""
res = DnbUrnService.get(urn)
click.echo(json.dumps(res, indent=4, sort_keys=True))

@urn.command()
@click.argument('document-pid')
@with_appcontext
def register(document_pid):
"""Register an URN to the DNB for a given document pid."""
doc = DocumentRecord.get_record_by_pid(document_pid)
DnbUrnService.register_document(doc)

@urn.command('urn-for-loaded-records')
@with_appcontext
def urn_for_loaded_records():
"""Generate and register urns for loaded records."""
for idx, document in enumerate(Urn.get_documents_to_generate_urns(), 1):
click.secho(
f'\t{idx}: generate urn code for pid: {document.pid}', fg='green')
f'\t{idx}: generate urn code for pid: {document["pid"]}', fg='green')
Urn.create_urn(document)
document.commit()
db.session.commit()
document.reindex()


@urn.command('register-urn-pids')
Expand Down
68 changes: 49 additions & 19 deletions sonar/modules/documents/dnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,33 @@ def verify(cls, urn_code):
f"/urns/urn/{urn_code}",
headers=cls.headers()
)
answer = response.status_code == 200
return response.status_code == 200
except Exception as error:
current_app.logger.error(
'unable to connect to DNB verify service.')
return answer


@classmethod
def get(cls, urn_code):
"""Verify the existence for a URN.
:param urn_code: the urn code.
:returns: True if urn is registered, otherwise False.
"""
# Documentation: https://wiki.dnb.de/display/URNSERVDOK/URN-Service+API
# https://wiki.dnb.de/display/URNSERVDOK/Beispiele%3A+URN-Verwaltung
answer = False
try:
response = requests.get(
f"{current_app.config.get('SONAR_APP_URN_DNB_BASE_URL')}"\
f"/urns/urn/{urn_code}",
headers=cls.headers()
)
answer = response.status_code == 200
return response.json()
except Exception as error:
current_app.logger.error(
'unable to connect to DNB verify service.')
@classmethod
def register_document(cls, document):
"""Register a new URN code.
Expand All @@ -77,21 +97,20 @@ def register_document(cls, document):
:returns: True if urn is registered, otherwise False.
"""
from sonar.modules.documents.api import DocumentRecord
from sonar.modules.organisations.api import OrganisationRecord
answer = False
sonar_base_url = current_app.config.get('SONAR_APP_BASE_URL')
base_url = current_app.config.get('SONAR_APP_BASE_URL')
if not isinstance(document, DocumentRecord):
document = DocumentRecord(document)
if orgs := document.get('organisation', []):
if orgs := document.replace_refs().get('organisation', []):
org_code = current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION')
if org := orgs[0]:
org = OrganisationRecord(org)
org = org.replace_refs()
if org.get('isDedicated') or org.get('isShared'):
org_code = org.get('code')
url = f"{sonar_base_url}/{org_code}/documents/{document.get('pid')}"
if org.get('isDedicated') and (server_name := org.get('serverName')):
base_url = server_name
url = f"{base_url}/{org_code}/documents/{document.get('pid')}"
urn = document.get_rero_urn_code(document)
data = {
'urn': document.get_rero_urn_code(document),
'urn': urn,
'urls': [
{
'url': url,
Expand All @@ -100,18 +119,29 @@ def register_document(cls, document):
]
}
try:
response = requests.request(
'POST',
f"{current_app.config.get('SONAR_APP_URN_DNB_BASE_URL')}/urns",
headers=cls.headers(),
data=json.dumps(data)
)
answer = response.status_code == 201
if cls.verify(urn):
response = requests.request(
'PATCH',
f"{current_app.config.get('SONAR_APP_URN_DNB_BASE_URL')}/urns/urn/{urn}/my-urls",
headers=cls.headers(),
data=json.dumps(data['urls'])
)
return response.status_code == 204
else:
response = requests.request(
'POST',
f"{current_app.config.get('SONAR_APP_URN_DNB_BASE_URL')}/urns",
headers=cls.headers(),
data=json.dumps(data)
)
response_data = response.json()
if response_data.get('urn') != urn:
current_app.logger.error(
f'Bad DNB server response: {response_data}')
return response.status_code == 201
except Exception as error:
current_app.logger.error(
'unable to connect to DNB register service.')
return answer


@classmethod
def register(cls, urn_code):
Expand Down
25 changes: 25 additions & 0 deletions sonar/modules/documents/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

"""DocumentRecord Extensions."""

from flask import current_app
from invenio_db import db
from invenio_pidstore.errors import PIDDoesNotExistError
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
from invenio_records.extensions import RecordExtension

Expand Down Expand Up @@ -82,10 +85,32 @@ def pre_create(self, record):
# Generate URN codes for documents without URNs.
if not record.get_rero_urn_code(record):
Urn.create_urn(record)
if record.model:
with db.session.begin_nested():
record.model.data = record
db.session.add(record.model)

def post_create(self, record):
"""Called after a record is created.
:param record: the invenio record instance to be processed.
"""
register_urn_code_from_document.delay(record)


def post_delete(self, record, force=False):
"""Called after a record is deleted.
:param record: the invenio record instance to be processed.
:param force: unused.
"""
from .api import DocumentRecord
if urn_code := DocumentRecord.get_rero_urn_code(record):
try:
urn_pid = PersistentIdentifier.get('urn', urn_code)
urn_pid.delete()
current_app.logger.error(
f'Document (pid:{record["pid"]}) has been deleted and '
f'contains an URN ({urn_code})')
except PIDDoesNotExistError:
pass
19 changes: 13 additions & 6 deletions sonar/modules/documents/urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,13 @@ def create_urn(cls, record):
:param record: the invenio record instance to be processed.
"""
from sonar.modules.documents.api import DocumentRecord
urn_config = current_app.config.get("SONAR_APP_DOCUMENT_URN")
org_pid = record.replace_refs().get("organisation", [{}])[0].get("pid")
if DocumentRecord.get_rero_urn_code(record):
current_app.logger.warning(
f'generated urn already exist for document: {record["pid"]}')
return
if config := urn_config.get("organisations", {}).get(org_pid):
if record.get("documentType") in config.get("types"):
urn_next_pid = str(UrnIdentifier.next())
Expand All @@ -105,10 +110,10 @@ def create_urn(cls, record):
else:
record["identifiedBy"] = \
[{"type": "bf:Urn", "value": urn_code}]
return pid
except PIDAlreadyExists:
current_app.logger.error(
'generated urn already exist for document: '
+ record.get('pid'))
f'generated urn already exist for document: {record["pid"]}')

@classmethod
def urn_query(cls, status=None):
Expand Down Expand Up @@ -187,20 +192,22 @@ def get_documents_to_generate_urns(cls):
:returns: generator of document records.
"""
from elasticsearch_dsl import Q

from sonar.modules.documents.api import DocumentRecord, DocumentSearch
urn_config = current_app.config.get("SONAR_APP_DOCUMENT_URN")
configs = urn_config.get('organisations', {})
pids = []
for org_pid in configs.items():
for org_pid in configs.keys():
config = configs.get(org_pid)
doc_types = config.get('types')
query = DocumentSearch()\
.filter('terms', documentType=doc_types)\
.filter('exists', field='identifiedBy')\
.filter('term', organisation__pid=org_pid)\
.filter('bool', must_not=[
Q('term', identifiedBy__type='bf:Urn')])\
Q('nested', path='identifiedBy', query=Q('term', identifiedBy__type='bf:Urn'))])\
.source(['pid'])
pids.extend(hit.pid for hit in query.scan())

for pid in pids:
for pid in set(pids):
yield DocumentRecord.get_record_by_pid(pid)
41 changes: 0 additions & 41 deletions sonar/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,8 @@
"""Utils functions for application."""

import datetime
import itertools
import re
from time import sleep

import click
from celery import current_app as current_celery
from flask import abort, current_app, g, request
from invenio_i18n.ext import current_i18n
from invenio_mail.api import TemplatedMessage
Expand Down Expand Up @@ -388,40 +384,3 @@ def file_download_ui(pid, record, _record_file_factory=None, **kwargs):
},
as_attachment=('download' in request.args)
)


def queue_count():
"""Count tasks in celery."""
inspector = current_celery.control.inspect()
task_count = 0
if reserved := inspector.reserved():
for _, values in reserved.items():
task_count += len(values)
if active := inspector.active():
for _, values in active.items():
task_count += len(values)
return task_count



def wait_empty_tasks(delay, verbose=False):
"""Wait for tasks to be empty."""
if verbose:
spinner = itertools.cycle(['-', '\\', '|', '/'])
click.echo(
f'Waiting: {next(spinner)}\r',
nl=False
)
count = queue_count()
sleep(5)
count += queue_count()
while count:
if verbose:
click.echo(
f'Waiting: {next(spinner)}\r',
nl=False
)
sleep(delay)
count = queue_count()
sleep(5)
count += queue_count()
4 changes: 3 additions & 1 deletion tests/api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def create_app():
def minimal_thesis_document(db, bucket_location, organisation):
"""Return a minimal thesis document."""
with requests_mock.mock() as response:
response.post(requests_mock.ANY, status_code=201)
response.post(
requests_mock.ANY, status_code=201,
json={'urn': 'urn:nbn:ch:rero-006-17'})
record = DocumentRecord.create(
{
"title": [
Expand Down
4 changes: 2 additions & 2 deletions tests/api/documents/test_documents_permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
def test_list(app, client, make_document, superuser, admin, moderator,
submitter, user):
"""Test list documents permissions."""
make_document(None, with_file=True)
make_document('org', with_file=True)
make_document(organisation=None, with_file=True)
make_document(organisation='org', with_file=True)

# Not logged
res = client.get(url_for('invenio_records_rest.doc_list'))
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_api_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_api_query(client, document_with_file, document_json, make_document,
},
'source': 'RERO'
}]
make_document('org')
make_document(organisation='org')
response = client.get(url_for(
'invenio_records_rest.doc_list',
subject=['Time series models', 'GARCH models']),
Expand Down
18 changes: 10 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def instance_path():
@pytest.fixture(scope='module', autouse=True)
def app_config(app_config):
"""Define configuration for module."""
app_config['RATELIMIT_STORAGE_URL'] = 'memory://'
app_config['SHIBBOLETH_SERVICE_PROVIDER'] = dict(
strict=True,
debug=True,
Expand Down Expand Up @@ -400,7 +401,8 @@ def superuser(make_user):
def document_json(app, db, bucket_location, organisation):
"""JSON document fixture."""
return {
'identifiedBy': [{
'identifiedBy': [
{
'value': 'urn:nbn:ch:rero-006-108713',
'type': 'bf:Urn'
}, {
Expand Down Expand Up @@ -524,18 +526,18 @@ def document_json(app, db, bucket_location, organisation):
def make_document(db, document_json, make_organisation, pdf_file, embargo_date):
"""Factory for creating document."""

def _make_document(organisation='org', with_file=False, pid=None):
def _make_document(data=document_json, organisation='org', with_file=False, pid=None):
if organisation:
make_organisation(organisation)
document_json['organisation'] = [{
data['organisation'] = [{
'$ref': 'https://sonar.ch/api/organisations/org'}]


if pid:
document_json['pid'] = pid
data['pid'] = pid
else:
document_json.pop('pid', None)
document_json.pop('_oai', None)
data.pop('pid', None)
data.pop('_oai', None)

record = DocumentRecord.create(document_json,
dbcommit=True,
Expand Down Expand Up @@ -563,13 +565,13 @@ def _make_document(organisation='org', with_file=False, pid=None):
@pytest.fixture()
def document(make_document):
"""Create a document."""
return make_document('org', False)
return make_document(organisation='org', with_file=False)


@pytest.fixture()
def document_with_file(make_document):
"""Create a document with a file associated."""
return make_document('org', True)
return make_document(organisation='org', with_file=True)


@pytest.fixture()
Expand Down
Loading

0 comments on commit 5c02320

Please sign in to comment.