From ad2241f7f9166c52783b882323d7f82ce14ef23e Mon Sep 17 00:00:00 2001 From: Renaud Michotte Date: Thu, 31 Mar 2022 10:46:17 +0200 Subject: [PATCH] document: subjects subdivision Adds possibility to have subject subdivisions for local subject. Refactoring document subjects management and rendering. Closes rero/rero-ils#2455. Closes rero/rero-ils#2609. Closes rero/rero-ils#1869. Co-Authored-by: Renaud Michotte --- rero_ils/modules/contributions/api.py | 2 +- rero_ils/modules/documents/api.py | 26 +- .../modules/documents/commons/__init__.py | 26 ++ .../modules/documents/commons/subjects.py | 222 ++++++++++++++++++ .../dojson/contrib/marc21tojson/dnb/model.py | 161 +------------ .../dojson/contrib/marc21tojson/kul/model.py | 166 +------------ .../contrib/marc21tojson/loc/__init__.py | 4 +- .../dojson/contrib/marc21tojson/loc/model.py | 78 +++--- .../dojson/contrib/marc21tojson/rero/model.py | 105 ++++----- .../dojson/contrib/marc21tojson/slsp/model.py | 111 ++++----- .../contrib/marc21tojson/ugent/model.py | 61 ++--- .../documents/document_subjects-v0.0.1.json | 176 +++++++++++++- .../v7/documents/document-v0.0.1.json | 22 +- rero_ils/modules/documents/models.py | 11 + rero_ils/modules/documents/serializers.py | 42 ++-- .../rero_ils/detailed_view_documents.html | 14 +- rero_ils/modules/documents/views.py | 29 +-- tests/ui/documents/test_documents_filter.py | 16 +- .../{ => documents}/test_documents_dojson.py | 26 +- .../test_documents_dojson_dc.py | 0 .../test_documents_dojson_ebooks.py | 0 .../test_documents_dojson_marc21.py | 0 .../test_documents_dojson_slsp.py | 0 .../test_documents_dojson_unimarc.py | 0 .../test_documents_jsonschema.py | 0 .../{ => documents}/test_documents_utils.py | 0 tests/unit/documents/test_subjects.py | 128 ++++++++++ 27 files changed, 817 insertions(+), 609 deletions(-) create mode 100644 rero_ils/modules/documents/commons/__init__.py create mode 100644 rero_ils/modules/documents/commons/subjects.py rename tests/unit/{ => documents}/test_documents_dojson.py (99%) rename tests/unit/{ => documents}/test_documents_dojson_dc.py (100%) rename tests/unit/{ => documents}/test_documents_dojson_ebooks.py (100%) rename tests/unit/{ => documents}/test_documents_dojson_marc21.py (100%) rename tests/unit/{ => documents}/test_documents_dojson_slsp.py (100%) rename tests/unit/{ => documents}/test_documents_dojson_unimarc.py (100%) rename tests/unit/{ => documents}/test_documents_jsonschema.py (100%) rename tests/unit/{ => documents}/test_documents_utils.py (100%) create mode 100644 tests/unit/documents/test_subjects.py diff --git a/rero_ils/modules/contributions/api.py b/rero_ils/modules/contributions/api.py index e4f7fd86d9..e818c982e8 100644 --- a/rero_ils/modules/contributions/api.py +++ b/rero_ils/modules/contributions/api.py @@ -251,7 +251,7 @@ def get_authorized_access_point(self, language): """Get localized authorized_access_point. :param language: language for authorized access point. - :returns: authorized access point in given lamguage. + :returns: authorized access point in given language. """ return self._get_mef_localized_value( key='authorized_access_point', diff --git a/rero_ils/modules/documents/api.py b/rero_ils/modules/documents/api.py index 122b81951d..0caaf8ed7c 100644 --- a/rero_ils/modules/documents/api.py +++ b/rero_ils/modules/documents/api.py @@ -25,7 +25,7 @@ from invenio_circulation.search.api import search_by_pid from jsonschema.exceptions import ValidationError -from .models import DocumentIdentifier, DocumentMetadata +from .models import DocumentIdentifier, DocumentMetadata, DocumentSubjectType from .utils import edition_format_text, publication_statement_text, \ series_statement_format_text, title_format_text_head from ..acq_order_lines.api import AcqOrderLinesSearch @@ -321,24 +321,14 @@ def replace_refs(self): if agent: contribution['agent'] = agent subjects = self.get('subjects', []) - resolved_subjects = [] for subject in subjects: - subject_ref = subject.get('$ref') - subject_type = subject.get('type') - if subject_ref and \ - subject_type in ['bf:Person', 'bf:Organisation']: - subject, _ = Contribution.get_record_by_ref(subject_ref) - if subject: - subject.update({'type': subject_type}) - resolved_subjects.append(subject) - else: - current_app.logger.error( - 'NO SUBJECT CONTRIBUTION REF FOUND:' - f' {self.pid} {subject_ref}') - else: - resolved_subjects.append(subject) - if resolved_subjects: - self['subjects'] = resolved_subjects + ref = subject.get('$ref') + type = subject.get('type') + if ref and type in [DocumentSubjectType.PERSON, + DocumentSubjectType.ORGANISATION]: + data, _ = Contribution.get_record_by_ref(ref) + del subject['$ref'] + subject.update(data) return super().replace_refs() diff --git a/rero_ils/modules/documents/commons/__init__.py b/rero_ils/modules/documents/commons/__init__.py new file mode 100644 index 0000000000..81cbccc9fd --- /dev/null +++ b/rero_ils/modules/documents/commons/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2022 RERO +# Copyright (C) 2022 UCLouvain +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Commons classes used by RERO documents.""" + +from .subjects import Subject, SubjectFactory + +__all__ = [ + Subject, + SubjectFactory +] diff --git a/rero_ils/modules/documents/commons/subjects.py b/rero_ils/modules/documents/commons/subjects.py new file mode 100644 index 0000000000..8d2f5bccd4 --- /dev/null +++ b/rero_ils/modules/documents/commons/subjects.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2022 RERO +# Copyright (C) 2022 UCLouvain +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Common classes used for documents. + +Subjects class represent all possible subjects data from a document resource. +It exists two kinds of subject: local subjects and references subjects. + * A local subject is a data structure where all subject metadata are include + in the self structure. + * A referenced subject is a data structure where a `$ref` key is a link to an + URI where found some metadata. + +HOW TO USE : + + >> from rero_ils.modules.documents.commons import Subject, SubjectFactory + >> Subject s = SubjectFactory.createSubject(subject_data) + >> print(s.render(language='fre')) + +As we don't know (we don't want to know) which kind of subject as describe into +`subject` data, we can't use the specialized corresponding class. Instead, we +can use a factory class that build for us the correct subject. +So we NEVER need to use other classes than `Subject` and `SubjectFactory`. +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field + +from rero_ils.modules.contributions.api import Contribution +from rero_ils.modules.documents.models import DocumentSubjectType + + +# ============================================================================= +# SUBJECT CLASSES +# ============================================================================= +@dataclass +class Subject(ABC): + """Document subject representation.""" + + type: str = field(init=False) + data: dict = field(default_factory=dict) + + def __post_init__(self): + """Post initialization dataclass magic function.""" + if 'type' not in self.data: + raise AttributeError('"type" attribute is required') + self.type = self.data['type'] + + @abstractmethod + def render(self, **kwargs) -> str: + """Render the subject as a string.""" + raise NotImplementedError() + + +@dataclass +class ReferenceSubject(Subject, ABC): + """Document subject related to a reference URI.""" + + reference: str = field(init=False) + + def __post_init__(self): + """Post initialization dataclass magic function.""" + super().__post_init__() + if '$ref' not in self.data: + raise AttributeError('"$ref" attribute is required') + self.reference = self.data['$ref'] + + def render(self, language=None, **kwargs) -> str: + """Render the subject as a string. + + :param language: preferred language for the subject. + :return the string representation of this subject. + """ + sub, _ = Contribution.get_record_by_ref(self.reference) + return sub.get_authorized_access_point(language=language) + + +@dataclass +class LocalSubject(Subject, ABC): + """Local document subject.""" + + part_separator: str = ' - ' + + def _get_subdivision_terms(self) -> list[str]: + """Get subject subdivision terms. + + :return the subdivision terms list. + """ + return self.data.get('genreForm_subdivisions', []) \ + + self.data.get('topic_subdivisions', []) \ + + self.data.get('temporal_subdivisions', []) \ + + self.data.get('place_subdivisions', []) + + @abstractmethod + def get_main_label(self) -> str: + """Get the main label of the subject.""" + raise NotImplementedError() + + def render(self, **kwargs) -> str: + """Render the subject as a string. + + :return the best possible label for this subject. + """ + parts = [self.get_main_label()] + self._get_subdivision_terms() + return LocalSubject.part_separator.join(parts) + + +@dataclass +class TermLocalSubject(LocalSubject): + """Local document subject representing base on `term` field.""" + + def get_main_label(self) -> str: + """Get the main label of the subject.""" + if 'term' not in self.data: + raise AttributeError('"term" doesn\'t exist for this subject') + return self.data['term'] + + +@dataclass +class PreferredNameLocalSubject(LocalSubject): + """Local document subject representing base on `preferred_name` field.""" + + def get_main_label(self) -> str: + """Get the main label of the subject.""" + if 'preferred_name' not in self.data: + msg = '"preferred_name" doesn\'t exist for this subject' + raise AttributeError(msg) + return self.data['preferred_name'] + + +@dataclass +class TitleLocalSubject(LocalSubject): + """Local document subject representing base on `title` field.""" + + def get_main_label(self) -> str: + """Get the main label of the subject.""" + if 'title' not in self.data: + msg = '"title" doesn\'t exist for this subject' + raise AttributeError(msg) + parts = [self.data['title']] + if 'creator' in self.data: + parts.append(self.data['creator']) + return ' / '.join(parts) + + +# ============================================================================= +# SUBJECT FACTORIES +# ============================================================================= + +class SubjectFactory: + """Document subject factory.""" + + @staticmethod + def create_subject(data) -> Subject: + """Factory method to create the concrete subject class. + + :param data: the dictionary representing the subject. + :return the created subject. + """ + factory_class = LocalSubjectFactory + if '$ref' in data: + factory_class = ReferenceSubjectFactory + return factory_class()._build_subject(data) + + @abstractmethod + def _build_subject(self, data) -> Subject: + """Build a subject from data. + + :param data: the dictionary representing the subject. + :return the built subject. + """ + raise NotImplementedError + + +class ReferenceSubjectFactory(SubjectFactory): + """Document referenced subject factory.""" + + def _build_subject(self, data) -> ReferenceSubject: + """Build a subject from data. + + :param data: the dictionary representing the subject. + :return the built subject. + """ + return ReferenceSubject(data=data) + + +class LocalSubjectFactory(SubjectFactory): + """Document local subject factory.""" + + mapper = { + DocumentSubjectType.ORGANISATION: PreferredNameLocalSubject, + DocumentSubjectType.PERSON: PreferredNameLocalSubject, + DocumentSubjectType.PLACE: PreferredNameLocalSubject, + DocumentSubjectType.TEMPORAL: TermLocalSubject, + DocumentSubjectType.TOPIC: TermLocalSubject, + DocumentSubjectType.WORK: TitleLocalSubject, + } + + def _build_subject(self, data) -> Subject: + """Build a subject from data. + + :param data: the dictionary representing the subject. + :return the built subject. + """ + subject_type = data.get('type') + if subject_type not in self.mapper.keys(): + raise AttributeError(f'{subject_type} isn\'t a valid subject type') + return self.mapper[subject_type](data=data) diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/dnb/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/dnb/model.py index f908f0a1ad..7333a4a058 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/dnb/model.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/dnb/model.py @@ -18,14 +18,11 @@ """rero-ils MARC21 model definition.""" -import re - from dojson import utils -from rero_ils.dojson.utils import ReroIlsMarc21Overdo, build_identifier, \ - build_string_from_subfields, get_contribution_link, \ - remove_trailing_punctuation +from rero_ils.dojson.utils import ReroIlsMarc21Overdo +from ..loc import marc21_to_subjects_6XX as marc21_to_subjects_6XX_loc from ..utils import do_abbreviated_title, \ do_acquisition_terms_from_field_037, do_classification, do_contribution, \ do_copyright_date, do_credits, do_dissertation, do_edition_statement, \ @@ -292,158 +289,8 @@ def marc21_to_supplementary_content(self, key, value): @utils.for_each_value @utils.ignore_value def marc21_to_subjects_6XX(self, key, value): - """Get subjects. - - - create an object : - genreForm : for the field 655 - subjects : for 6xx with $2 rero - subjects_imported : for 6xx having indicator 2 '0' or '2' - """ - type_per_tag = { - '600': 'bf:Person', - '610': 'bf:Organization', - '611': 'bf:Organization', - '600t': 'bf:Work', - '610t': 'bf:Work', - '611t': 'bf:Work', - '630': 'bf:Work', - '650': 'bf:Topic', # or bf:Temporal, changed by code - '651': 'bf:Place', - '655': 'bf:Topic' - } - - ref_link_per_tag = { - '600': 'IdRef agent', - '610': 'IdRef agent', - '611': 'IdRef agent', - '600t': 'IdRef work', - '610t': 'IdRef work', - '611t': 'IdRef work', - '630': 'IdRef work', - '650': 'RERO RAMEAU concept', - '651': 'Idref place', - '655': 'RERO RAMEAU concept' - } - - field_data_per_tag = { - '600': 'preferred_name', - '610': 'preferred_name', - '611': 'preferred_name', - '600t': 'title', - '610t': 'title', - '611t': 'title', - '630': 'title', - '650': 'term', - '651': 'preferred_name', - '655': 'term' - } - - subfield_code_per_tag = { - '600': 'abcd', - '610': 'ab', - '611': 'acden', - '600t': 'tpn', - '610t': 'tpn', - '611t': 't', - '630': 'apn', - '650': 'a', - '651': 'a', - '655': 'a' - } - - conference_per_tag = { - '610': False, - '611': True - } - source_per_indicator_2 = { - '0': 'LCSH', - '2': 'MeSH' - } - - indicator_2 = key[4] - tag_key = key[:3] - subfields_2 = utils.force_list(value.get('2')) - subfield_2 = None - if subfields_2: - subfield_2 = subfields_2[0] - subfields_a = utils.force_list(value.get('a', [])) - if subfield_2 in ('rero', 'gnd', 'idref'): - has_dollar_t = value.get('t') - - if tag_key in ('600', '610', '611') and has_dollar_t: - tag_key += 't' - data_type = type_per_tag[tag_key] - - if tag_key == '650': - for subfield_a in subfields_a: - start_with_digit_regexp = re.compile(r'^\d') - match = start_with_digit_regexp.search(subfield_a) - if match: - data_type = 'bf:Temporal' - break - - subject = { - 'type': data_type, - } - - string_build = build_string_from_subfields( - value, - subfield_code_per_tag[tag_key]) - if (tag_key == '655'): - # remove the square brackets - string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) - subject[field_data_per_tag[tag_key]] = string_build - - if tag_key in ('610', '611'): - subject['conference'] = conference_per_tag[tag_key] - - if tag_key in ('600t', '610t', '611t'): - creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 - subject['creator'] = remove_trailing_punctuation( - build_string_from_subfields( - value, - subfield_code_per_tag[creator_tag_key]), - '.', '.' - ) - field_key = 'subjects' - if tag_key == '655': - field_key = 'genreForm' - - subfields_0 = utils.force_list(value.get('0')) - if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: - ref = get_contribution_link(marc21.bib_id, marc21.rero_id, - subfields_0[0], key) - if ref: - subject = { - '$ref': ref, - 'type': data_type, - } - if not subject.get('$ref'): - identifier = build_identifier(value) - if identifier: - subject['identifiedBy'] = identifier - - if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): - subjects = self.get(field_key, []) - subjects.append(subject) - self[field_key] = subjects - elif indicator_2 in ['0', '2']: - term_string = build_string_from_subfields( - value, - 'abcdefghijklmnopqrstuvwxyz', ' - ') - if term_string: - source = source_per_indicator_2[indicator_2] - subject_imported = { - 'type': type_per_tag[tag_key], - 'source': source - } - subject_imported[field_data_per_tag[tag_key]] = term_string - if tag_key in ('610', '611'): - subject_imported['conference'] = conference_per_tag[tag_key] - subjects_imported = self.get('subjects_imported', []) - if subject_imported: - subjects_imported.append(subject_imported) - self['subjects_imported'] = subjects_imported + """Get subjects.""" + return marc21_to_subjects_6XX_loc(self, key, value) @marc21.over('sequence_numbering', '^362..') diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/kul/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/kul/model.py index be118d5580..73d482d71f 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/kul/model.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/kul/model.py @@ -18,14 +18,11 @@ """rero-ils MARC21 model definition.""" -import re - from dojson import utils -from rero_ils.dojson.utils import ReroIlsMarc21Overdo, build_identifier, \ - build_string_from_subfields, get_contribution_link, \ - remove_trailing_punctuation +from rero_ils.dojson.utils import ReroIlsMarc21Overdo +from ..loc import marc21_to_subjects_6XX as marc21_to_subjects_6XX_loc from ..utils import do_abbreviated_title, \ do_acquisition_terms_from_field_037, do_classification, do_contribution, \ do_copyright_date, do_credits, do_dissertation, do_edition_statement, \ @@ -299,163 +296,8 @@ def marc21_to_supplementary_content(self, key, value): @utils.for_each_value @utils.ignore_value def marc21_to_subjects_6XX(self, key, value): - """Get subjects. - - - create an object : - genreForm : for the field 655 - subjects : for 6xx with $2 rero - subjects_imported : for 6xx having indicator 2 '0' or '2' - """ - type_per_tag = { - '600': 'bf:Person', - '610': 'bf:Organization', - '611': 'bf:Organization', - '600t': 'bf:Work', - '610t': 'bf:Work', - '611t': 'bf:Work', - '630': 'bf:Work', - '650': 'bf:Topic', # or bf:Temporal, changed by code - '651': 'bf:Place', - '655': 'bf:Topic' - } - - ref_link_per_tag = { - '600': 'IdRef agent', - '610': 'IdRef agent', - '611': 'IdRef agent', - '600t': 'IdRef work', - '610t': 'IdRef work', - '611t': 'IdRef work', - '630': 'IdRef work', - '650': 'RERO RAMEAU concept', - '651': 'Idref place', - '655': 'RERO RAMEAU concept' - } - - field_data_per_tag = { - '600': 'preferred_name', - '610': 'preferred_name', - '611': 'preferred_name', - '600t': 'title', - '610t': 'title', - '611t': 'title', - '630': 'title', - '650': 'term', - '651': 'preferred_name', - '655': 'term' - } - - subfield_code_per_tag = { - '600': 'abcd', - '610': 'ab', - '611': 'acden', - '600t': 'tpn', - '610t': 'tpn', - '611t': 't', - '630': 'apn', - '650': 'a', - '651': 'a', - '655': 'a' - } - - conference_per_tag = { - '610': False, - '611': True - } - source_per_indicator_2 = { - '0': 'LCSH', - '2': 'MeSH' - } - - indicator_2 = key[4] - tag_key = key[:3] - subfields_2 = utils.force_list(value.get('2')) - subfield_2 = None - if subfields_2: - subfield_2 = subfields_2[0] - subfields_a = utils.force_list(value.get('a', [])) - - if subfield_2 in ('rero', 'gnd', 'idref'): - has_dollar_t = value.get('t') - - if tag_key in ('600', '610', '611') and has_dollar_t: - tag_key += 't' - data_type = type_per_tag[tag_key] - - if tag_key == '650': - for subfield_a in subfields_a: - start_with_digit_regexp = re.compile(r'^\d') - match = start_with_digit_regexp.search(subfield_a) - if match: - data_type = 'bf:Temporal' - break - - subject = { - 'type': data_type, - } - - string_build = build_string_from_subfields( - value, - subfield_code_per_tag[tag_key]) - if (tag_key == '655'): - # remove the square brackets - string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) - subject[field_data_per_tag[tag_key]] = string_build - - if tag_key in ('610', '611'): - subject['conference'] = conference_per_tag[tag_key] - - if tag_key in ('600t', '610t', '611t'): - creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 - subject['creator'] = remove_trailing_punctuation( - build_string_from_subfields( - value, - subfield_code_per_tag[creator_tag_key]), - '.', '.' - ) - field_key = 'subjects' - if tag_key == '655': - field_key = 'genreForm' - - subfields_0 = utils.force_list(value.get('0')) - if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: - ref = get_contribution_link(marc21.bib_id, marc21.rero_id, - subfields_0[0], key) - if ref: - subject = { - '$ref': ref, - 'type': data_type, - } - if not subject.get('$ref'): - identifier = build_identifier(value) - if identifier: - subject['identifiedBy'] = identifier - - if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): - subjects = self.get(field_key, []) - subjects.append(subject) - self[field_key] = subjects - elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']: - term_string = build_string_from_subfields( - value, - 'abcdefghijklmnopqrstuvwxyz', ' - ') - if term_string: - if subfield_2 == 'rerovoc': - source = 'rerovoc' - else: - source = source_per_indicator_2[indicator_2] - subject_imported = { - 'type': type_per_tag[tag_key], - 'source': source - } - subject_imported[field_data_per_tag[tag_key]] = \ - term_string.rstrip('.') - if tag_key in ('610', '611'): - subject_imported['conference'] = conference_per_tag[tag_key] - subjects_imported = self.get('subjects_imported', []) - if subject_imported: - subjects_imported.append(subject_imported) - self['subjects_imported'] = subjects_imported + """Get subjects.""" + return marc21_to_subjects_6XX_loc(self, key, value) @marc21.over('sequence_numbering', '^362..') diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/__init__.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/__init__.py index d811c10243..2dd0c8c22a 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/__init__.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/__init__.py @@ -18,6 +18,6 @@ """MARC21 RERO to JSON.""" -from .model import marc21 +from .model import marc21, marc21_to_subjects_6XX -__all__ = ('marc21') +__all__ = ('marc21', 'marc21_to_subjects_6XX') diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py index 462ced11bb..72da66c060 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py @@ -533,6 +533,19 @@ def marc21_to_subjects_6XX(self, key, value): subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ + + def perform_subdivisions(field): + """Perform subject subdivisions from MARC field.""" + subdivisions = { + 'v': 'genreForm_subdivisions', + 'x': 'topic_subdivisions', + 'y': 'temporal_subdivisions', + 'z': 'place_subdivisions' + } + for code, subdivision in subdivisions.items(): + for subfield_value in utils.force_list(value.get(code, [])): + field.setdefault(subdivision, []).append(subfield_value) + type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', @@ -546,19 +559,6 @@ def marc21_to_subjects_6XX(self, key, value): '655': 'bf:Topic' } - ref_link_per_tag = { - '600': 'IdRef agent', - '610': 'IdRef agent', - '611': 'IdRef agent', - '600t': 'IdRef work', - '610t': 'IdRef work', - '611t': 'IdRef work', - '630': 'IdRef work', - '650': 'RERO RAMEAU concept', - '651': 'Idref place', - '655': 'RERO RAMEAU concept' - } - field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', @@ -597,23 +597,18 @@ def marc21_to_subjects_6XX(self, key, value): indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) - subfield_2 = None - if subfields_2: - subfield_2 = subfields_2[0] + subfield_2 = subfields_2[0] if subfields_2 else None subfields_a = utils.force_list(value.get('a', [])) - if subfield_2 in ('rero', 'gnd', 'idref'): - has_dollar_t = value.get('t') - - if tag_key in ('600', '610', '611') and has_dollar_t: + if subfield_2 in ['rero', 'gnd', 'idref']: + if tag_key in ['600', '610', '611'] and value.get('t'): tag_key += 't' data_type = type_per_tag[tag_key] + # `data_type` is Temporal if tag is 650 and a $a start with digit. if tag_key == '650': for subfield_a in subfields_a: - start_with_digit_regexp = re.compile(r'^\d') - match = start_with_digit_regexp.search(subfield_a) - if match: + if subfield_a[0].isdigit(): data_type = 'bf:Temporal' break @@ -622,28 +617,21 @@ def marc21_to_subjects_6XX(self, key, value): } string_build = build_string_from_subfields( - value, - subfield_code_per_tag[tag_key]) - if (tag_key == '655'): + value, subfield_code_per_tag[tag_key]) + if tag_key == '655': # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build - if tag_key in ('610', '611'): + if tag_key in ['610', '611']: subject['conference'] = conference_per_tag[tag_key] - if tag_key in ('600t', '610t', '611t'): + if tag_key in ['600t', '610t', '611t']: creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( - value, - subfield_code_per_tag[creator_tag_key]), - '.', '.' - ) - field_key = 'subjects' - if tag_key == '655': - field_key = 'genreForm' - + value, subfield_code_per_tag[creator_tag_key]), '.', '.') + field_key = 'genreForm' if tag_key == '655' else 'subjects' subfields_0 = utils.force_list(value.get('0')) if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, @@ -657,27 +645,23 @@ def marc21_to_subjects_6XX(self, key, value): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier + perform_subdivisions(subject) if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects - elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']: + elif indicator_2 in ['0', '2']: term_string = build_string_from_subfields( - value, - 'abcdefghijklmnopqrstuvwxyz', ' - ') + value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: - if subfield_2 == 'rerovoc': - source = 'rerovoc' - else: - source = source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], - 'source': source + 'source': source_per_indicator_2[indicator_2], + field_data_per_tag[tag_key]: term_string.rstrip('.') } - subject_imported[field_data_per_tag[tag_key]] = \ - term_string.rstrip('.') - if tag_key in ('610', '611'): + perform_subdivisions(subject_imported) + if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/rero/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/rero/model.py index 3c0b3b83cb..6e65b505e0 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/rero/model.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/rero/model.py @@ -57,11 +57,8 @@ def marc21_to_pid(self, key, value): If 001 starts with 'REROILS:' save as pid. """ - pid = None value = value.strip().split(':') - if value[0] == 'REROILS': - pid = value[1] - return pid + return value[1] if value[0] == 'REROILS' else None @marc21.over('language', '^008') @@ -518,6 +515,19 @@ def marc21_to_subjects(self, key, value): subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ + + def perform_subdivisions(field): + """Perform subject subdivisions from MARC field.""" + subdivisions = { + 'v': 'genreForm_subdivisions', + 'x': 'topic_subdivisions', + 'y': 'temporal_subdivisions', + 'z': 'place_subdivisions' + } + for code, subdivision in subdivisions.items(): + for subfield_value in utils.force_list(value.get(code, [])): + field.setdefault(subdivision, []).append(subfield_value) + type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', @@ -531,19 +541,6 @@ def marc21_to_subjects(self, key, value): '655': 'bf:Topic' } - ref_link_per_tag = { - '600': 'IdRef agent', - '610': 'IdRef agent', - '611': 'IdRef agent', - '600t': 'IdRef work', - '610t': 'IdRef work', - '611t': 'IdRef work', - '630': 'IdRef work', - '650': 'RERO RAMEAU concept', - '651': 'Idref place', - '655': 'RERO RAMEAU concept' - } - field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', @@ -557,19 +554,6 @@ def marc21_to_subjects(self, key, value): '655': 'term' } - subfield_code_per_tag = { - '600': 'abcd', - '610': 'ab', - '611': 'acden', - '600t': 'tpn', - '610t': 'tpn', - '611t': 't', - '630': 'apn', - '650': 'a', - '651': 'a', - '655': 'a' - } - conference_per_tag = { '610': False, '611': True @@ -582,24 +566,18 @@ def marc21_to_subjects(self, key, value): indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) - subfield_2 = None - if subfields_2: - subfield_2 = subfields_2[0] + subfield_2 = subfields_2[0] if subfields_2 else None subfields_a = utils.force_list(value.get('a', [])) if subfield_2 == 'rero': - has_dollar_t = value.get('t') - - if tag_key in ('600', '610', '611') and has_dollar_t: + if tag_key in ['600', '610', '611'] and value.get('t'): tag_key += 't' data_type = type_per_tag[tag_key] - start_with_digit = False + # `data_type` is Temporal if tag is 650 and a $a start with digit. if tag_key == '650': for subfield_a in subfields_a: - start_with_digit_regexp = re.compile(r'^\d') - match = start_with_digit_regexp.search(subfield_a) - if match: + if subfield_a[0].isdigit(): data_type = 'bf:Temporal' break @@ -607,18 +585,29 @@ def marc21_to_subjects(self, key, value): 'type': data_type, } + subfield_code_per_tag = { + '600': 'abcd', + '610': 'ab', + '611': 'acden', + '600t': 'tpn', + '610t': 'tpn', + '611t': 't', + '630': 'apn', + '650': 'a', + '651': 'a', + '655': 'a' + } + string_build = build_string_from_subfields( - value, - subfield_code_per_tag[tag_key]) - if (tag_key == '655'): + value, subfield_code_per_tag[tag_key]) + if tag_key == '655': # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build - if tag_key in ('610', '611'): + if tag_key in ['610', '611']: subject['conference'] = conference_per_tag[tag_key] - - if tag_key in ('600t', '610t', '611t'): + elif tag_key in ['600t', '610t', '611t']: creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( @@ -626,10 +615,7 @@ def marc21_to_subjects(self, key, value): subfield_code_per_tag[creator_tag_key]), '.', '.' ) - field_key = 'subjects' - if tag_key == '655': - field_key = 'genreForm' - + field_key = 'genreForm' if tag_key == '655' else 'subjects' subfields_0 = utils.force_list(value.get('0')) if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, @@ -643,26 +629,27 @@ def marc21_to_subjects(self, key, value): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier + perform_subdivisions(subject) if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects + elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']: term_string = build_string_from_subfields( - value, - 'abcdefghijklmnopqrstuvwxyz', ' - ') + value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: - if subfield_2 == 'rerovoc': - source = 'rerovoc' - else: - source = source_per_indicator_2[indicator_2] + source = 'rerovoc' if subfield_2 == 'rerovoc' \ + else source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], - 'source': source + 'source': source, + field_data_per_tag[tag_key]: term_string } - subject_imported[field_data_per_tag[tag_key]] = term_string - if tag_key in ('610', '611'): + perform_subdivisions(subject_imported) + + if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/slsp/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/slsp/model.py index 98a8c2e244..1ebdd0131b 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/slsp/model.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/slsp/model.py @@ -17,7 +17,6 @@ # along with this program. If not, see . """rero-ils MARC21 model definition.""" - import re from dojson import utils @@ -57,11 +56,8 @@ def marc21_to_pid(self, key, value): If 001 starts with 'REROILS:' save as pid. """ - pid = None value = value.strip().split(':') - if value[0] == 'REROILS': - pid = value[1] - return pid + return value[1] if value[0] == 'REROILS' else None @marc21.over('language', '^008') @@ -320,6 +316,19 @@ def marc21_to_subjects_6XX(self, key, value): subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ + + def perform_subdivisions(field): + """Perform subject subdivisions from MARC field.""" + subdivisions = { + 'v': 'genreForm_subdivisions', + 'x': 'topic_subdivisions', + 'y': 'temporal_subdivisions', + 'z': 'place_subdivisions' + } + for code, subdivision in subdivisions.items(): + for subfield_value in utils.force_list(value.get(code, [])): + field.setdefault(subdivision, []).append(subfield_value) + type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', @@ -333,19 +342,6 @@ def marc21_to_subjects_6XX(self, key, value): '655': 'bf:Topic' } - ref_link_per_tag = { - '600': 'IdRef agent', - '610': 'IdRef agent', - '611': 'IdRef agent', - '600t': 'IdRef work', - '610t': 'IdRef work', - '611t': 'IdRef work', - '630': 'IdRef work', - '650': 'RERO RAMEAU concept', - '651': 'Idref place', - '655': 'RERO RAMEAU concept' - } - field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', @@ -359,19 +355,6 @@ def marc21_to_subjects_6XX(self, key, value): '655': 'term' } - subfield_code_per_tag = { - '600': 'abcd', - '610': 'ab', - '611': 'acden', - '600t': 'tpn', - '610t': 'tpn', - '611t': 't', - '630': 'apn', - '650': 'a', - '651': 'a', - '655': 'a' - } - conference_per_tag = { '610': False, '611': True @@ -384,23 +367,18 @@ def marc21_to_subjects_6XX(self, key, value): indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) - subfield_2 = None - if subfields_2: - subfield_2 = subfields_2[0] + subfield_2 = subfields_2[0] if subfields_2 else None subfields_a = utils.force_list(value.get('a', [])) - if subfield_2 in ('rero', 'gnd', 'idref'): - has_dollar_t = value.get('t') - - if tag_key in ('600', '610', '611') and has_dollar_t: + if subfield_2 == 'rero': + if tag_key in ['600', '610', '611'] and value.get('t'): tag_key += 't' data_type = type_per_tag[tag_key] + # `data_type` is Temporal if tag is 650 and a $a start with digit. if tag_key == '650': for subfield_a in subfields_a: - start_with_digit_regexp = re.compile(r'^\d') - match = start_with_digit_regexp.search(subfield_a) - if match: + if subfield_a[0].isdigit(): data_type = 'bf:Temporal' break @@ -408,29 +386,34 @@ def marc21_to_subjects_6XX(self, key, value): 'type': data_type, } + subfield_code_per_tag = { + '600': 'abcd', + '610': 'ab', + '611': 'acden', + '600t': 'tpn', + '610t': 'tpn', + '611t': 't', + '630': 'apn', + '650': 'a', + '651': 'a', + '655': 'a' + } + string_build = build_string_from_subfields( - value, - subfield_code_per_tag[tag_key]) - if (tag_key == '655'): + value, subfield_code_per_tag[tag_key]) + if tag_key == '655': # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build - if tag_key in ('610', '611'): + if tag_key in ['610', '611']: subject['conference'] = conference_per_tag[tag_key] - - if tag_key in ('600t', '610t', '611t'): + elif tag_key in ['600t', '610t', '611t']: creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( - value, - subfield_code_per_tag[creator_tag_key]), - '.', '.' - ) - field_key = 'subjects' - if tag_key == '655': - field_key = 'genreForm' - + value, subfield_code_per_tag[creator_tag_key]), '.', '.') + field_key = 'genreForm' if tag_key == '655' else 'subjects' subfields_0 = utils.force_list(value.get('0')) if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, @@ -444,23 +427,27 @@ def marc21_to_subjects_6XX(self, key, value): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier + perform_subdivisions(subject) if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects - elif indicator_2 in ['0', '2']: + + elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']: term_string = build_string_from_subfields( - value, - 'abcdefghijklmnopqrstuvwxyz', ' - ') + value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: - source = source_per_indicator_2[indicator_2] + source = 'rerovoc' if subfield_2 == 'rerovoc' \ + else source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], - 'source': source + 'source': source, + field_data_per_tag[tag_key]: term_string } - subject_imported[field_data_per_tag[tag_key]] = term_string - if tag_key in ('610', '611'): + perform_subdivisions(subject_imported) + + if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/ugent/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/ugent/model.py index ac4bebd407..c9ef2be770 100644 --- a/rero_ils/modules/documents/dojson/contrib/marc21tojson/ugent/model.py +++ b/rero_ils/modules/documents/dojson/contrib/marc21tojson/ugent/model.py @@ -301,6 +301,19 @@ def marc21_to_subjects_imported(self, key, value): subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ + + def perform_subdivisions(field): + """Perform subject subdivisions from MARC field.""" + subdivisions = { + 'v': 'genreForm_subdivisions', + 'x': 'topic_subdivisions', + 'y': 'temporal_subdivisions', + 'z': 'place_subdivisions' + } + for code, subdivision in subdivisions.items(): + for subfield_value in utils.force_list(value.get(code, [])): + field.setdefault(subdivision, []).append(subfield_value) + type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', @@ -314,19 +327,6 @@ def marc21_to_subjects_imported(self, key, value): '655': 'bf:Topic' } - ref_link_per_tag = { - '600': 'IdRef agent', - '610': 'IdRef agent', - '611': 'IdRef agent', - '600t': 'IdRef work', - '610t': 'IdRef work', - '611t': 'IdRef work', - '630': 'IdRef work', - '650': 'RERO RAMEAU concept', - '651': 'Idref place', - '655': 'RERO RAMEAU concept' - } - field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', @@ -340,19 +340,6 @@ def marc21_to_subjects_imported(self, key, value): '655': 'term' } - subfield_code_per_tag = { - '600': 'abcd', - '610': 'ab', - '611': 'acden', - '600t': 'tpn', - '610t': 'tpn', - '611t': 't', - '630': 'apn', - '650': 'a', - '651': 'a', - '655': 'a' - } - conference_per_tag = { '610': False, '611': True @@ -365,27 +352,21 @@ def marc21_to_subjects_imported(self, key, value): indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) - subfield_2 = None - if subfields_2: - subfield_2 = subfields_2[0] - subfields_a = utils.force_list(value.get('a', [])) + subfield_2 = subfields_2[0] if subfields_2 else None if subfield_2 == 'lcsh' or indicator_2 in ['0', '2', '7']: term_string = build_string_from_subfields( - value, - 'abcdefghijklmnopqrstuvwxyz', ' - ') + value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: - if subfield_2 == 'lcsh': - source = 'LCSH' - else: - source = source_per_indicator_2[indicator_2] + source = 'LCSH' if subfield_2 == 'lcsh' else \ + source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], - 'source': source + 'source': source, + field_data_per_tag[tag_key]: term_string.rstrip('.') } - subject_imported[field_data_per_tag[tag_key]] = \ - term_string.rstrip('.') - if tag_key in ('610', '611'): + perform_subdivisions(subject_imported) + if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: diff --git a/rero_ils/modules/documents/jsonschemas/documents/document_subjects-v0.0.1.json b/rero_ils/modules/documents/jsonschemas/documents/document_subjects-v0.0.1.json index cc1acf9776..07eba8b563 100644 --- a/rero_ils/modules/documents/jsonschemas/documents/document_subjects-v0.0.1.json +++ b/rero_ils/modules/documents/jsonschemas/documents/document_subjects-v0.0.1.json @@ -19,7 +19,11 @@ "type", "preferred_name", "identifiedBy", - "source" + "source", + "genreForm_subdivisions", + "topic_subdivisions", + "temporal_subdivisions", + "place_subdivisions" ], "required": [ "type", @@ -53,6 +57,18 @@ }, "source": { "$ref": "#/definitions/source" + }, + "genreForm_subdivisions": { + "$ref": "#/definitions/genreForm_subdivisions" + }, + "topic_subdivisions": { + "$ref": "#/definitions/topic_subdivisions" + }, + "temporal_subdivisions": { + "$ref": "#/definitions/temporal_subdivisions" + }, + "place_subdivisions": { + "$ref": "#/definitions/place_subdivisions" } } }, @@ -68,7 +84,11 @@ "preferred_name", "conference", "identifiedBy", - "source" + "source", + "genreForm_subdivisions", + "topic_subdivisions", + "temporal_subdivisions", + "place_subdivisions" ], "required": [ "type", @@ -109,6 +129,18 @@ }, "source": { "$ref": "#/definitions/source" + }, + "genreForm_subdivisions": { + "$ref": "#/definitions/genreForm_subdivisions" + }, + "topic_subdivisions": { + "$ref": "#/definitions/topic_subdivisions" + }, + "temporal_subdivisions": { + "$ref": "#/definitions/temporal_subdivisions" + }, + "place_subdivisions": { + "$ref": "#/definitions/place_subdivisions" } } }, @@ -124,7 +156,11 @@ "title", "creator", "identifiedBy", - "source" + "source", + "genreForm_subdivisions", + "topic_subdivisions", + "temporal_subdivisions", + "place_subdivisions" ], "required": [ "type", @@ -168,6 +204,18 @@ }, "source": { "$ref": "#/definitions/source" + }, + "genreForm_subdivisions": { + "$ref": "#/definitions/genreForm_subdivisions" + }, + "topic_subdivisions": { + "$ref": "#/definitions/topic_subdivisions" + }, + "temporal_subdivisions": { + "$ref": "#/definitions/temporal_subdivisions" + }, + "place_subdivisions": { + "$ref": "#/definitions/place_subdivisions" } } }, @@ -182,7 +230,11 @@ "type", "term", "identifiedBy", - "source" + "source", + "genreForm_subdivisions", + "topic_subdivisions", + "temporal_subdivisions", + "place_subdivisions" ], "required": [ "type", @@ -218,6 +270,18 @@ }, "source": { "$ref": "#/definitions/source" + }, + "genreForm_subdivisions": { + "$ref": "#/definitions/genreForm_subdivisions" + }, + "topic_subdivisions": { + "$ref": "#/definitions/topic_subdivisions" + }, + "temporal_subdivisions": { + "$ref": "#/definitions/temporal_subdivisions" + }, + "place_subdivisions": { + "$ref": "#/definitions/place_subdivisions" } } }, @@ -232,7 +296,11 @@ "type", "preferred_name", "identifiedBy", - "source" + "source", + "genreForm_subdivisions", + "topic_subdivisions", + "temporal_subdivisions", + "place_subdivisions" ], "required": [ "type", @@ -266,6 +334,18 @@ }, "source": { "$ref": "#/definitions/source" + }, + "genreForm_subdivisions": { + "$ref": "#/definitions/genreForm_subdivisions" + }, + "topic_subdivisions": { + "$ref": "#/definitions/topic_subdivisions" + }, + "temporal_subdivisions": { + "$ref": "#/definitions/temporal_subdivisions" + }, + "place_subdivisions": { + "$ref": "#/definitions/place_subdivisions" } } }, @@ -280,7 +360,11 @@ "type", "term", "identifiedBy", - "source" + "source", + "genreForm_subdivisions", + "topic_subdivisions", + "temporal_subdivisions", + "place_subdivisions" ], "required": [ "type", @@ -316,6 +400,18 @@ }, "source": { "$ref": "#/definitions/source" + }, + "genreForm_subdivisions": { + "$ref": "#/definitions/genreForm_subdivisions" + }, + "topic_subdivisions": { + "$ref": "#/definitions/topic_subdivisions" + }, + "temporal_subdivisions": { + "$ref": "#/definitions/temporal_subdivisions" + }, + "place_subdivisions": { + "$ref": "#/definitions/place_subdivisions" } } } @@ -354,6 +450,74 @@ }, "hide": true } + }, + "genreForm_subdivisions": { + "title": "Form subdivisions", + "type": "array", + "minItems": 1, + "items": { + "title": "Form subdivision", + "description": "Subject subdivision for a specific kind or genre of material", + "type": "string", + "minLength": 2, + "form": { + "placeholder": "Example: Periodicals" + } + }, + "form": { + "hide": true + } + }, + "topic_subdivisions": { + "title": "Concept subdivisions", + "type": "array", + "minItems": 1, + "items": { + "title": "Concept subdivision", + "description": "Subject subdivision for a concept", + "type": "string", + "minLength": 2, + "form": { + "placeholder": "Example: History" + } + }, + "form": { + "hide": true + } + }, + "temporal_subdivisions": { + "title": "Time-span subdivisions", + "type": "array", + "minItems": 1, + "items": { + "title": "Time-span subdivision", + "description": "Subject subdivision for a period of time", + "type": "string", + "minLength": 2, + "form": { + "placeholder": "Example: 500-1400" + } + }, + "form": { + "hide": true + } + }, + "place_subdivisions": { + "title": "Place subdivisions", + "type": "array", + "minItems": 1, + "items": { + "title": "Place subdivision", + "description": "Subject subdivision for a place", + "type": "string", + "minLength": 2, + "form": { + "placeholder": "Example: Mississippi" + } + }, + "form": { + "hide": true + } } } } diff --git a/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json b/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json index 64200c2742..39071f6193 100644 --- a/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json +++ b/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json @@ -715,7 +715,8 @@ "type": "keyword" }, "preferred_name": { - "type": "text" + "type": "text", + "copy_to": "facet_subjects" }, "term": { "type": "text", @@ -740,7 +741,8 @@ } }, "title": { - "type": "text" + "type": "text", + "copy_to": "facet_subjects" }, "creator": { "type": "text" @@ -764,6 +766,22 @@ }, "source": { "type": "keyword" + }, + "genreForm_subdivisions": { + "type": "keyword", + "copy_to": "facet_subjects" + }, + "topic_subdivisions": { + "type": "keyword", + "copy_to": "facet_subjects" + }, + "temporal_subdivisions": { + "type": "keyword", + "copy_to": "facet_subjects" + }, + "place_subdivisions": { + "type": "keyword", + "copy_to": "facet_subjects" } } }, diff --git a/rero_ils/modules/documents/models.py b/rero_ils/modules/documents/models.py index 49c69121f9..fd95759b53 100644 --- a/rero_ils/modules/documents/models.py +++ b/rero_ils/modules/documents/models.py @@ -40,3 +40,14 @@ class DocumentMetadata(db.Model, RecordMetadataBase): """Document record metadata.""" __tablename__ = 'document_metadata' + + +class DocumentSubjectType: + """Document subject type.""" + + ORGANISATION = 'bf:Organisation' + PERSON = 'bf:Person' + PLACE = 'bf:Place' + TEMPORAL = 'bf:Temporal' + TOPIC = 'bf:Topic' + WORK = 'bf:Work' diff --git a/rero_ils/modules/documents/serializers.py b/rero_ils/modules/documents/serializers.py index f8de6d39d4..7cbe071372 100644 --- a/rero_ils/modules/documents/serializers.py +++ b/rero_ils/modules/documents/serializers.py @@ -39,7 +39,7 @@ from ..documents.api import Document from ..documents.utils import title_format_text_head from ..documents.views import create_title_alternate_graphic, \ - create_title_responsibilites, create_title_variants + create_title_responsibilites, create_title_variants, subject_format from ..libraries.api import LibrariesSearch from ..locations.api import LocationsSearch from ..organisations.api import OrganisationsSearch @@ -55,6 +55,16 @@ def preprocess_record(self, pid, record, links_factory=None, **kwargs): """Prepare a record and persistent identifier for serialization.""" rec = record titles = rec.get('title', []) + + # build subjects text for display purpose + # Subject formatting must be done before `replace_refs` otherwise the + # referenced object couldn't be performed + # TODO :: Find a way to get language to use to render subject using + # `Accepted-language` header. + language = None + for subject in record.get('subjects', []): + subject['_text'] = subject_format(subject, language) + # build responsibility data for display purpose responsibility_statement = rec.get('responsibilityStatement', []) responsibilities = \ @@ -85,6 +95,7 @@ def preprocess_record(self, pid, record, links_factory=None, **kwargs): ) if contributions: rec['contribution'] = contributions + return super().preprocess_record( pid=pid, record=rec, links_factory=links_factory, kwargs=kwargs) @@ -114,14 +125,10 @@ def post_process_serialize_search(self, results, pid_fetcher): metadata['ui_title_text_responsibility'] = text_title if viewcode != global_view_code: - items = metadata.get('items', []) - if items: - output = [] - for item in items: - if item.get('organisation')\ - .get('organisation_pid') == view_id: - output.append(item) - record['metadata']['items'] = output + record['metadata']['items'] = [ + item for item in metadata.get('items', []) + if item['organisation'].get('organisation_pid') == view_id + ] # Aggregations process if viewcode == global_view_code: @@ -178,12 +185,10 @@ def _process_library_buckets(cls, org, lib_buckets): :return processed buckets """ lib_processed_buckets = [] - libraries = {} # get the library names for a given organisation records = LibrariesSearch()\ .get_libraries_by_organisation_pid(org, ['pid', 'name']) - for record in records: - libraries[record.pid] = record.name + libraries = {record.pid: record.name for record in records} # for all library bucket for a given organisation for lib_bucket in lib_buckets: lib_key = lib_bucket.get('key') @@ -211,10 +216,11 @@ def _process_location_buckets(cls, lib, loc_buckets): :return: processed buckets """ # get the location names for a given library - locations = {} - for es_loc in LocationsSearch().source(['pid', 'name'])\ - .filter('term', library__pid=lib).scan(): - locations[es_loc.pid] = es_loc.name + query = LocationsSearch() \ + .filter('term', library__pid=lib) \ + .source(['pid', 'name']) + locations = {hit.pid: hit.name for hit in query.scan()} + loc_processed_buckets = [] # for all location bucket for a given library for loc_bucket in loc_buckets: @@ -530,9 +536,7 @@ def serialize_search(self, pid_fetcher, search_result, :returns: The objects serialized. """ language = request.args.get('ln', DEFAULT_LANGUAGE) - with_holdings_items = True - if request.args.get('without_items', False): - with_holdings_items = False + with_holdings_items = not request.args.get('without_items', False) sru = search_result['hits'].get('sru', {}) query_es = sru.get('query_es', '') organisation_pids = re.findall( diff --git a/rero_ils/modules/documents/templates/rero_ils/detailed_view_documents.html b/rero_ils/modules/documents/templates/rero_ils/detailed_view_documents.html index da67c57059..3d31b50bd6 100644 --- a/rero_ils/modules/documents/templates/rero_ils/detailed_view_documents.html +++ b/rero_ils/modules/documents/templates/rero_ils/detailed_view_documents.html @@ -199,13 +199,13 @@

{% if record.subjects %} -
- {% for subject in record.subjects %} - - {{ subject | subject_format(current_i18n.language) }} - - {% endfor %} -
+
+ {% for subject in record.subjects %} + + {{ subject | subject_format(current_i18n.language) }} + + {% endfor %} +
{% endif %} diff --git a/rero_ils/modules/documents/views.py b/rero_ils/modules/documents/views.py index 095c9a15a5..ba9e684008 100644 --- a/rero_ils/modules/documents/views.py +++ b/rero_ils/modules/documents/views.py @@ -30,6 +30,7 @@ from invenio_records_ui.signals import record_viewed from .api import Document, DocumentsSearch +from .commons import SubjectFactory from .utils import create_authorized_access_point, \ display_alternate_graphic_first, edition_format_text, get_remote_cover, \ publication_statement_text, series_statement_format_text, \ @@ -718,29 +719,19 @@ def online_holdings(document_pid, viewcode='global'): @blueprint.app_template_filter() -def subject_format(subject, language): +def subject_format(subject_data, language): """Format the subject according to the available keys. - :param subject: the record subject. + :param subject_data: the record subject. :param language: current language on interface. """ - for key in ['$ref', 'term', 'preferred_name', 'title']: - value = subject.get(key) - # key does not exists try the next one - if not value: - continue - # resolve $ref and retrieve the name in the given language - if key == '$ref': - sub, _ = Contribution.get_record_by_ref(value) - return sub._get_mef_localized_value( - 'preferred_name', language) - # add the creator to the title - creator = subject.get('creator') - if key == 'title' and creator: - value = ' / '.join([value, creator]) - # do nothing for term and preferred_name - if value: - return value + try: + return SubjectFactory \ + .create_subject(subject_data) \ + .render(language=language) + except (AttributeError, TypeError) as err: + # print(str(err)) + return 'Subject parsing error !' @blueprint.app_template_filter() diff --git a/tests/ui/documents/test_documents_filter.py b/tests/ui/documents/test_documents_filter.py index a4f5d38367..0dc0f76111 100644 --- a/tests/ui/documents/test_documents_filter.py +++ b/tests/ui/documents/test_documents_filter.py @@ -17,13 +17,14 @@ """Document filters tests.""" - from rero_ils.modules.documents.api import Document +from rero_ils.modules.documents.models import DocumentSubjectType from rero_ils.modules.documents.views import cartographic_attributes, \ contribution_format, identified_by, main_title_text, note_general, \ notes_except_general, part_of_format, provision_activity, \ provision_activity_not_publication, provision_activity_original_date, \ - provision_activity_publication, title_variants, work_access_point + provision_activity_publication, subject_format, title_variants, \ + work_access_point def test_note_general(): @@ -518,3 +519,14 @@ def test_main_title_text(): extract = main_title_text(title) assert len(extract) == 1 assert extract[0].get('_text') is not None + + +def test_subject_format(): + """Test subject format filter.""" + data = { + 'term': 'subject topic', + 'type': DocumentSubjectType.TOPIC + } + assert subject_format(data, None) == 'subject topic' + data['type'] = DocumentSubjectType.ORGANISATION + assert subject_format(data, None) == 'Subject parsing error !' diff --git a/tests/unit/test_documents_dojson.py b/tests/unit/documents/test_documents_dojson.py similarity index 99% rename from tests/unit/test_documents_dojson.py rename to tests/unit/documents/test_documents_dojson.py index d9ca772493..ef209be47e 100644 --- a/tests/unit/test_documents_dojson.py +++ b/tests/unit/documents/test_documents_dojson.py @@ -4938,6 +4938,12 @@ def test_marc21_to_subjects(mock_get): [Bases de données] + genre1 + genre2 + topic1 + temporal1 + temporal2 + place1 rero @@ -4945,8 +4951,12 @@ def test_marc21_to_subjects(mock_get): marc21json = create_record(marc21xml) data = marc21.do(marc21json) assert data.get('genreForm') == [{ - 'type': 'bf:Topic', - 'term': 'Bases de données', + 'type': 'bf:Topic', + 'term': 'Bases de données', + 'genreForm_subdivisions': ['genre1', 'genre2'], + 'temporal_subdivisions': ['temporal1', 'temporal2'], + 'topic_subdivisions': ['topic1'], + 'place_subdivisions': ['place1'], }] @@ -5114,10 +5124,14 @@ def test_marc21_to_subjects_imported(): marc21json = create_record(marc21xml) data = marc21.do(marc21json) assert data.get('subjects_imported') == [{ - 'type': 'bf:Organization', - 'preferred_name': 'Catholic Church - Relations - Eastern churches', - 'source': 'LCSH', - 'conference': False + 'type': 'bf:Organization', + 'preferred_name': 'Catholic Church', + 'source': 'LCSH', + 'conference': False, + 'topic_subdivisions': [ + 'Relations', + 'Eastern churches' + ] }] diff --git a/tests/unit/test_documents_dojson_dc.py b/tests/unit/documents/test_documents_dojson_dc.py similarity index 100% rename from tests/unit/test_documents_dojson_dc.py rename to tests/unit/documents/test_documents_dojson_dc.py diff --git a/tests/unit/test_documents_dojson_ebooks.py b/tests/unit/documents/test_documents_dojson_ebooks.py similarity index 100% rename from tests/unit/test_documents_dojson_ebooks.py rename to tests/unit/documents/test_documents_dojson_ebooks.py diff --git a/tests/unit/test_documents_dojson_marc21.py b/tests/unit/documents/test_documents_dojson_marc21.py similarity index 100% rename from tests/unit/test_documents_dojson_marc21.py rename to tests/unit/documents/test_documents_dojson_marc21.py diff --git a/tests/unit/test_documents_dojson_slsp.py b/tests/unit/documents/test_documents_dojson_slsp.py similarity index 100% rename from tests/unit/test_documents_dojson_slsp.py rename to tests/unit/documents/test_documents_dojson_slsp.py diff --git a/tests/unit/test_documents_dojson_unimarc.py b/tests/unit/documents/test_documents_dojson_unimarc.py similarity index 100% rename from tests/unit/test_documents_dojson_unimarc.py rename to tests/unit/documents/test_documents_dojson_unimarc.py diff --git a/tests/unit/test_documents_jsonschema.py b/tests/unit/documents/test_documents_jsonschema.py similarity index 100% rename from tests/unit/test_documents_jsonschema.py rename to tests/unit/documents/test_documents_jsonschema.py diff --git a/tests/unit/test_documents_utils.py b/tests/unit/documents/test_documents_utils.py similarity index 100% rename from tests/unit/test_documents_utils.py rename to tests/unit/documents/test_documents_utils.py diff --git a/tests/unit/documents/test_subjects.py b/tests/unit/documents/test_subjects.py new file mode 100644 index 0000000000..9584c7f9a2 --- /dev/null +++ b/tests/unit/documents/test_subjects.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2022 RERO +# Copyright (C) 2022 UCLouvain +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Tests subject classes for documents.""" +import mock +import pytest +from utils import mock_response + +from rero_ils.modules.contributions.api import Contribution +from rero_ils.modules.documents.commons import SubjectFactory +from rero_ils.modules.documents.models import DocumentSubjectType +from rero_ils.modules.utils import get_ref_for_pid + + +def test_document_local_subjects(): + """Test local document subjects classes and factory.""" + + # SUCCESS + subjects = [{ + 'data': { + 'conference': False, + 'place_subdivisions': ['New-York', 'United-States'], + 'preferred_name': 'ONU', + 'type': DocumentSubjectType.ORGANISATION + }, + 'result': 'ONU - New-York - United-States' + }, { + 'data': { + 'temporal_subdivisions': ['(1922-1976)'], + 'place_subdivisions': ['Martigny', 'Suisse'], + 'preferred_name': 'Jean Dupont', + 'type': DocumentSubjectType.PERSON + }, + 'result': 'Jean Dupont - (1922-1976) - Martigny - Suisse' + }, { + 'data': { + 'title': 'RERO for dummies', + 'creator': 'RERO & UCLouvain teams', + 'type': DocumentSubjectType.WORK + }, + 'result': 'RERO for dummies / RERO & UCLouvain teams' + }, { + 'data': { + 'term': 'horlogerie', + 'type': DocumentSubjectType.TOPIC + }, + 'result': 'horlogerie' + }] + for subject in subjects: + s = SubjectFactory.create_subject(subject['data']) + assert s.render() == subject['result'] + + # ERRORS + data = { + 'preferred_name': 'Error', + 'type': DocumentSubjectType.TOPIC + } + with pytest.raises(AttributeError) as error: + SubjectFactory.create_subject(data).render() + assert 'term' in str(error.value) + data = { + 'term': 'Error', + 'type': DocumentSubjectType.WORK + } + with pytest.raises(AttributeError) as error: + SubjectFactory.create_subject(data).render() + assert 'title' in str(error.value) + data = { + 'term': 'Error', + 'type': DocumentSubjectType.ORGANISATION + } + with pytest.raises(AttributeError) as error: + SubjectFactory.create_subject(data).render() + assert 'preferred_name' in str(error.value) + data = { + 'term': 'Error', + 'type': 'dummy' + } + with pytest.raises(AttributeError): + SubjectFactory.create_subject(data).render() + data = { + 'term': 'No type' + } + with pytest.raises(AttributeError): + SubjectFactory.create_subject(data).render() + + +@mock.patch('requests.get') +def test_document_referenced_subject( + mock_contributions_mef_get, contribution_person_response_data, + contribution_person +): + """Test referenced document subjects.""" + mock_contributions_mef_get.return_value = mock_response( + json_data=contribution_person_response_data) + + # REFERENCED SUBJECTS - SUCCESS + data = { + '$ref': get_ref_for_pid(Contribution, contribution_person.pid), + 'type': DocumentSubjectType.PERSON + } + subject = SubjectFactory.create_subject(data) + assert subject.render(language='ger') == 'Loy, Georg, 1885-19..' + assert subject.render(language='dummy') == 'Loy, Georg, 1885-19..' + assert subject.render() == 'Loy, Georg, 1885-19..' + + # REFERENCED SUBJECTS - ERRORS + data = { + '$dummy_ref': get_ref_for_pid(Contribution, contribution_person.pid), + 'type': DocumentSubjectType.PERSON + } + with pytest.raises(AttributeError): + SubjectFactory.create_subject(data).render()