Skip to content

Commit

Permalink
resource: improve data validation against JSONSchema
Browse files Browse the repository at this point in the history
* Adds a better 'email' format validation for all RERO-ILS resources.
* Simplify JSON schema defining fields with 'email' format.
* Adds length constraints for some `User` resource fields.
* Closes rero#2765.
* Closes rero#2838.

Co-Authored-by: Renaud Michotte <renaud.michotte@gmail.com>
  • Loading branch information
zannkukai committed May 20, 2022
1 parent 059eafa commit 6c87df9
Show file tree
Hide file tree
Showing 15 changed files with 165 additions and 147 deletions.
3 changes: 2 additions & 1 deletion rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2686,7 +2686,7 @@ def _(x):
}
}

RECORDS_JSON_SCHEMA = {
RERO_ILS_DEFAULT_JSON_SCHEMA = {
'acac': '/acq_accounts/acq_account-v0.0.1.json',
'acol': '/acq_order_lines/acq_order_line-v0.0.1.json',
'acor': '/acq_orders/acq_order-v0.0.1.json',
Expand Down Expand Up @@ -2715,6 +2715,7 @@ def _(x):
'tmpl': '/templates/template-v0.0.1.json',
'oplg': '/operation_logs/operation_log-v0.0.1.json',
'vndr': '/vendors/vendor-v0.0.1.json',
'user': '/users/user-v0.0.1.json'
}

# Operation Log Configuration
Expand Down
51 changes: 32 additions & 19 deletions rero_ils/modules/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for manipulating records."""

import re
from copy import deepcopy
from uuid import uuid4

Expand All @@ -39,13 +39,28 @@
from invenio_records_rest.utils import obj_or_import_string
from invenio_search import current_search
from invenio_search.api import RecordsSearch
from jsonschema import FormatChecker
from jsonschema.compat import str_types
from jsonschema.exceptions import ValidationError
from kombu.compat import Consumer
from sqlalchemy import text
from sqlalchemy.orm.exc import NoResultFound

from .utils import extracted_data_from_ref

"""Custom ILS record JSON schema format validator."""
ils_record_format_checker = FormatChecker()


@ils_record_format_checker.checks('email')
@ils_record_format_checker.checks('idn-email')
def _strong_email_validation(instance) -> bool:
"""Allow to validate an email address (only email format, not DNS)."""
if not isinstance(instance, str_types):
return False
email_regexp = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
return bool(re.fullmatch(email_regexp, instance))


class IlsRecordError:
"""Base class for errors in the IlsRecordClass."""
Expand Down Expand Up @@ -111,6 +126,8 @@ class IlsRecord(Record):
pids_exist_check = None
pid_check = True

format_checker = ils_record_format_checker

@classmethod
def get_indexer_class(cls):
"""Get the indexer from config."""
Expand All @@ -134,6 +151,7 @@ def _validate(self, **kwargs):
if self.get('_draft'):
# No validation is needed for draft records
return self

json = super()._validate(**kwargs)
validation_message = self.extended_validation(**kwargs)
# We only like to run pids_exist_check if validation_message is True
Expand Down Expand Up @@ -168,24 +186,22 @@ def create(cls, data, id_=None, delete_pid=False,
assert cls.provider
if '$schema' not in data:
pid_type = cls.provider.pid_type
schemas = current_app.config.get('RECORDS_JSON_SCHEMA')
schemas = current_app.config.get('RERO_ILS_DEFAULT_JSON_SCHEMA')
if pid_type in schemas:
from .utils import get_schema_for_resource
data['$schema'] = get_schema_for_resource(pid_type)
pid = data.get('pid')
if delete_pid and pid:
del data['pid']
else:
if pid:
test_rec = cls.get_record_by_pid(pid)
if test_rec is not None:
raise IlsRecordError.PidAlreadyUsed(
'PidAlreadyUsed {pid_type} {pid} {uuid}'.format(
pid_type=cls.provider.pid_type,
pid=test_rec.pid,
uuid=test_rec.id
)
elif pid:
if test_rec := cls.get_record_by_pid(pid):
raise IlsRecordError.PidAlreadyUsed(
'PidAlreadyUsed {pid_type} {pid} {uuid}'.format(
pid_type=cls.provider.pid_type,
pid=test_rec.pid,
uuid=test_rec.id
)
)
if not id_:
id_ = uuid4()
persistent_identifier = cls.minter(id_, data)
Expand Down Expand Up @@ -215,11 +231,10 @@ def get_record_by_pid(cls, pid, with_deleted=False, verbose=False):
cls.provider.pid_type,
pid
)
record = super().get_record(
return super().get_record(
persistent_identifier.object_uuid,
with_deleted=with_deleted
)
return record
# TODO: is it better to raise a error or to return None?
except (NoResultFound, PIDDoesNotExistError):
return None
Expand Down Expand Up @@ -391,7 +406,7 @@ def update(self, data, commit=False, dbcommit=False, reindex=False):
schema = data.get('$schema')
if not schema:
pid_type = self.provider.pid_type
schemas = current_app.config.get('RECORDS_JSON_SCHEMA')
schemas = current_app.config.get('RERO_ILS_DEFAULT_JSON_SCHEMA')
if pid_type in schemas:
from .utils import get_schema_for_resource
data['$schema'] = get_schema_for_resource(pid_type)
Expand Down Expand Up @@ -516,16 +531,14 @@ class IlsRecordsIndexer(RecordIndexer):

def index(self, record):
"""Indexing a record."""
return_value = super().index(record, arguments=dict(refresh='true'))
return return_value
return super().index(record, arguments=dict(refresh='true'))

def delete(self, record):
"""Delete a record.
:param record: Record instance.
"""
return_value = super().delete(record, refresh='true')
return return_value
return super().delete(record, refresh='true')

def bulk_index(self, record_id_iterator, doc_type=None, index=None):
"""Bulk index records.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@
"title": "E-mail",
"description": "The notifications for patrons without e-mail are sent to this address. No notification is sent if the field is empty.",
"type": "string",
"format": "email",
"pattern": "^.*@.*\\..+$",
"minLength": 6
"format": "email"
}
}
},
Expand All @@ -60,9 +58,7 @@
"title": "E-mail",
"description": "The notifications for patrons without e-mail are sent to this address. No notification is sent if the field is empty.",
"type": "string",
"format": "email",
"pattern": "^.*@.*\\..+$",
"minLength": 6
"format": "email"
},
"delay": {
"title": "Delay in minutes",
Expand Down Expand Up @@ -153,9 +149,7 @@
"title": "Email",
"description": "Email of the library.",
"type": "string",
"format": "email",
"pattern": "^.*?@.*.*$",
"minLength": 3
"format": "email"
},
"opening_hours": {
"title": "Opening Hours",
Expand Down Expand Up @@ -431,9 +425,7 @@
"email": {
"title": "Contact email",
"type": "string",
"format": "email",
"pattern": "^.*?@.*.*$",
"minLength": 6
"format": "email"
},
"phone": {
"title": "Contact phone",
Expand Down Expand Up @@ -468,9 +460,7 @@
"email": {
"title": "Contact email",
"type": "string",
"format": "email",
"pattern": "^.*?@.*.*$",
"minLength": 6
"format": "email"
},
"phone": {
"title": "Contact phone",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@
"description": "Specify a generic email address used for notification.",
"type": "string",
"format": "email",
"pattern": "^\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,})+$",
"minLength": 6,
"form": {
"hideExpression": "model.allow_request === false || model.send_notification === false",
"expressionProperties": {
Expand All @@ -152,11 +150,6 @@
"addonLeft": {
"class": "fa fa-at"
}
},
"validation": {
"messages": {
"patternMessage": "The email is not valid."
}
}
}
},
Expand Down
11 changes: 5 additions & 6 deletions rero_ils/modules/patrons/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ def _validate_emails(self):
is email.
"""
patron = self.get('patron')
user = self._get_user_by_user_id(self.get('user_id'))
if patron and patron.get('communication_channel') == \
CommunicationChannel.EMAIL and user.email is None\
CommunicationChannel.EMAIL \
and self.user.email is None \
and patron.get('additional_communication_email') is None:
raise ValidationError('At least one email should be defined '
'for an email communication channel.')
Expand Down Expand Up @@ -246,12 +246,11 @@ def removeUserData(cls, data):

@classmethod
def _get_user_by_user_id(cls, user_id):
"""Get the user using a dict representing a patron.
"""Get the user by its ID.
Try to get an existing user by: user_id, email, username.
:param cls: Class itself.
:param data: dict representing a patron.
:return: a patron object or None.
:param user_id: the user ID
:return: a User object or None.
"""
return _datastore.find_user(id=user_id)

Expand Down
2 changes: 1 addition & 1 deletion rero_ils/modules/patrons/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ def pre_dump(self, record, data, dumper=None):
:return the future dumped data.
"""
user = User.get_by_id(record.get('user_id'))
user_info = user.dumpsMetadata()
user_info = user.dumps_metadata()
return data.update(user_info)
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,8 @@
"title": "Additional communication email",
"type": "string",
"format": "email",
"pattern": "^.*@.*\\..+$",
"minLength": 6,
"form": {
"hideExpression": "field.parent.model && field.parent.model.communication_channel !== 'email'",
"validation": {
"messages": {
"patternMessage": "The email is not valid."
}
}
"hideExpression": "field.parent.model && field.parent.model.communication_channel !== 'email'"
}
},
"communication_language": {
Expand Down
2 changes: 1 addition & 1 deletion rero_ils/modules/patrons/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def logged_user():
if not current_user.is_authenticated:
return jsonify(data)

user = User.get_by_id(current_user.id).dumpsMetadata()
user = User.get_by_id(current_user.id).dumps_metadata()
user['id'] = current_user.id
data = {**data, **user, 'patrons': []}
for patron in Patron.get_patrons_by_user(current_user):
Expand Down
Loading

0 comments on commit 6c87df9

Please sign in to comment.