data model: implements publication statement transformation for BNF

* implement publication statement transformation for BNF Co-Authored-by: Peter Weber peter.weber@rero.ch Signed-off-by: Peter Weber <Peter.Weber@rero.ch>
rero · Sep 17, 2019 · fae3a6c · fae3a6c
1 parent 5876f6a
commit fae3a6c
Show file tree

Hide file tree

Showing 4 changed files with 366 additions and 151 deletions.
diff --git a/rero_ils/dojson/utils.py b/rero_ils/dojson/utils.py
@@ -17,46 +17,46 @@
 
 """Dojson utils."""
 
+import re
+
 import click
 from dojson import Overdo, utils
 
 
-class ReroIlsMarc21Overdo(Overdo):
+def remove_trailing_punctuation(
+        data,
+        punctuation=',',
+        spaced_punctuation=':;/-'):
+    """Remove trailing punctuation from data.
+
+    The punctuation parameter list the
+    punctuation characters to be removed
+    (preceded by a space or not).
+
+    The spaced_punctuation parameter list the
+    punctuation characters needing one or more preceding space(s)
+    in order to be removed.
+    """
+    return re.sub(
+        r'([{0}]|\s+[{1}])$'.format(punctuation, spaced_punctuation),
+        '',
+        data.rstrip()).rstrip()
+
+
+class ReroIlsOverdo(Overdo):
     """Specialzed Overdo."""
 
     blob_record = None
-    field_008_data = ''
-    lang_from_008 = None
-    date1_from_008 = None
-    date2_from_008 = None
-    date_type_from_008 = ''
-    langs_from_041_a = []
-    langs_from_041_h = []
-    alternate_graphic = {}
 
     def __init__(self, bases=None, entry_point_group=None):
         """Init."""
-        super(ReroIlsMarc21Overdo, self).__init__(
+        super(ReroIlsOverdo, self).__init__(
             bases=bases, entry_point_group=entry_point_group)
 
     def do(self, blob, ignore_missing=True, exception_handlers=None):
         """Translate blob values and instantiate new model instance."""
         self.blob_record = blob
-        self.field_008_data = ''
-        self.date1_from_008 = None
-        self.date2_from_008 = None
-        self.date_type_from_008 = ''
-        fields_008 = self.get_fields(tag='008')
-        if fields_008:
-            self.field_008_data = self.get_control_field_data(
-                fields_008[0]).replace('\n', '')
-            self.date1_from_008 = self.field_008_data[7:11]
-            self.date2_from_008 = self.field_008_data[11:15]
-            self.date_type_from_008 = self.field_008_data[6]
-        self.init_lang()
-        self.init_country()
-        self.init_alternate_graphic()
-        result = super(ReroIlsMarc21Overdo, self).do(
+        result = super(ReroIlsOverdo, self).do(
             blob,
             ignore_missing=ignore_missing,
             exception_handlers=exception_handlers
@@ -109,6 +109,48 @@ def get_subfields(self, field, code=None):
             raise ValueError('data field expected (tag >= 01x)')
         return subfields
 
+
+class ReroIlsMarc21Overdo(ReroIlsOverdo):
+    """Specialzed Overdo."""
+
+    field_008_data = ''
+    lang_from_008 = None
+    date1_from_008 = None
+    date2_from_008 = None
+    date_type_from_008 = ''
+    langs_from_041_a = []
+    langs_from_041_h = []
+    alternate_graphic = {}
+
+    def __init__(self, bases=None, entry_point_group=None):
+        """Init."""
+        super(ReroIlsMarc21Overdo, self).__init__(
+            bases=bases, entry_point_group=entry_point_group)
+
+    def do(self, blob, ignore_missing=True, exception_handlers=None):
+        """Translate blob values and instantiate new model instance."""
+        self.blob_record = blob
+        self.field_008_data = ''
+        self.date1_from_008 = None
+        self.date2_from_008 = None
+        self.date_type_from_008 = ''
+        fields_008 = self.get_fields(tag='008')
+        if fields_008:
+            self.field_008_data = self.get_control_field_data(
+                fields_008[0]).replace('\n', '')
+            self.date1_from_008 = self.field_008_data[7:11]
+            self.date2_from_008 = self.field_008_data[11:15]
+            self.date_type_from_008 = self.field_008_data[6]
+        self.init_lang()
+        self.init_country()
+        self.init_alternate_graphic()
+        result = super(ReroIlsMarc21Overdo, self).do(
+            blob,
+            ignore_missing=ignore_missing,
+            exception_handlers=exception_handlers
+        )
+        return result
+
     def get_link_data(self, subfields_6_data):
         """Extract link and script data from subfields $6 data."""
         link = None

diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py
@@ -23,7 +23,8 @@
 import requests
 from dojson import utils
 
-from rero_ils.dojson.utils import ReroIlsMarc21Overdo
+from rero_ils.dojson.utils import ReroIlsMarc21Overdo, \
+    remove_trailing_punctuation
 
 marc21tojson = ReroIlsMarc21Overdo()
 
@@ -36,26 +37,6 @@ def list_of_langs(data):
     return lang_codes
 
 
-def remove_trailing_punctuation(
-        data,
-        punctuation=',',
-        spaced_punctuation=':;/-'):
-    """Remove trailing punctuation from data.
-
-    The punctuation parameter list the
-    punctuation characters to be removed
-    (preceded by a space or not).
-
-    The spaced_punctuation parameter list the
-    punctuation characters needing one or more preceding space(s)
-    in order to be removed.
-    """
-    return re.sub(
-        r'([{0}]|\s+[{1}])$'.format(punctuation, spaced_punctuation),
-        '',
-        data.rstrip()).rstrip()
-
-
 def get_mef_person_link(id, key, value):
     """Get mef person link."""
     # https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677

diff --git a/rero_ils/modules/documents/dojson/contrib/unimarctojson/model.py b/rero_ils/modules/documents/dojson/contrib/unimarctojson/model.py
@@ -17,29 +17,18 @@
 
 """rero-ils UNIMARC model definition."""
 
+
 import re
 from json import loads
 
-from dojson import Overdo, utils
-# from dojson.utils import force_list
+from dojson import utils
+from dojson.utils import GroupableOrderedDict, force_list
 from pkg_resources import resource_string
 
-unimarctojson = Overdo()
+from rero_ils.dojson.utils import ReroIlsOverdo, remove_trailing_punctuation
 
+unimarctojson = ReroIlsOverdo()
 
-# @unimarctojson.over('__order__', '__order__')
-# def order(self, key, value):
-#     """Preserve order of datafields."""
-#     order = []
-#     for field in value:
-#         name = unimarctojson.index.query(field)
-#         if name:
-#             name = name[0]
-#         else:
-#             name = field
-#         order.append(name)
-#
-#     return order
 
 @unimarctojson.over('type', 'leader')
 def unimarctype(self, key, value):
@@ -180,50 +169,107 @@ def unimarctoauthor(self, key, value):
     return author
 
 
-@unimarctojson.over('publishers', '^210..')
+@unimarctojson.over('provisionActivity', '^21[04]..')
+@utils.for_each_value
 @utils.ignore_value
-def unimarcpublishers_publicationDate(self, key, value):
-    """Get publisher.
-
-    publisher.name: 210 [$b repetitive]
-    publisher.place: 210 [$a repetitive]
-    publicationDate: 210 [$c repetitive] (take only the first one)
-    """
-    lasttag = '?'
-    publishers = self.get('publishers', [])
-
-    publisher = {}
-    indexes = {}
-    lasttag = '?'
-    for tag in value['__order__']:
-        index = indexes.get(tag, 0)
-        data = value[tag]
-        if type(data) == tuple:
-            data = data[index]
-        if tag == 'a' and index > 0 and lasttag != 'a':
-            publishers.append(publisher)
-            publisher = {}
-        if tag == 'a':
-            place = publisher.get('place', [])
-            place.append(data)
-            publisher['place'] = place
-        elif tag == 'c':
-            name = publisher.get('name', [])
-            name.append(data)
-            publisher['name'] = name
-        elif tag == 'd' and index == 0:
-
-            # 4 digits
-            date = re.match(r'.*?(\d{4})', data).group(1)
-            self['publicationYear'] = int(date)
-
-            # create free form if different
-            if data != str(self['publicationYear']):
-                self['freeFormedPublicationDate'] = data
-        indexes[tag] = index + 1
-        lasttag = tag
-    publishers.append(publisher)
-    return publishers
+def unimarcpublishers_provision_activity_publication(self, key, value):
+    """Get provision activity dates."""
+    def build_place_or_agent_data(code, label, index, add_country):
+        type_per_code = {
+            'a': 'bf:Place',
+            'c': 'bf:Agent'
+        }
+        place_or_agent_data = {
+            'type': type_per_code[code],
+            'label': [{'value': remove_trailing_punctuation(label)}]
+        }
+        if add_country:
+            # country from 102
+            field_102 = unimarctojson.get_fields(tag='102')
+            if field_102:
+                field_102 = field_102[0]
+                country_codes = unimarctojson.get_subfields(field_102, 'a')
+                if country_codes:
+                    place_or_agent_data['country'] = country_codes[0].lower()
+        return place_or_agent_data
+
+    publication = {}
+    ind2 = key[4]
+    type_per_ind2 = {
+        ' ': 'bf:Publication',
+        '_': 'bf:Publication',
+        '0': 'bf:Publication',
+        '1': 'bf:Production',
+        '2': 'bf:Distribution',
+        '3': 'bf:Manufacture'
+    }
+    if ind2 == '4':
+        field_d = value.get('d')
+        if field_d:
+            field_d = force_list(field_d)[0]
+            copyrightDate = self.get('copyrightDate', [])
+            if field_d[0] == 'P':
+                copyrightDate.append('℗ ' + field_d[2:])
+            else:
+                copyrightDate.append('© ' + field_d)
+            self['copyrightDate'] = copyrightDate
+    else:
+        publication = {
+            'type': type_per_ind2[ind2],
+            'statement': [],
+        }
+        subfields_d = utils.force_list(value.get('d'))
+        if subfields_d:
+            subfield_d = subfields_d[0]
+            publication['date'] = subfield_d
+
+        if ind2 in (' ', '_', '1'):
+            # startDate: 100, pos. 9-12 endDate: 100, pos. 13-16
+            field_100 = unimarctojson.get_fields(tag='100')
+            if field_100:
+                field_100 = field_100[0]
+                data = unimarctojson.get_subfields(field_100, 'a')
+                if data:
+                    try:
+                        publication['startDate'] = str(int(data[0][9:13]))
+                    except Exception:
+                        pass
+                    try:
+                        publication['endDate'] = str(int(data[0][13:17]))
+                    except Exception:
+                        pass
+        elif ind2 in ('0', '2', '3'):
+            if subfields_d:
+                dates = subfield_d.split('-')
+                if dates[0]:
+                    match = re.search(r'(^\d{4}$)', dates[0])
+                    if match:
+                        publication['startDate'] = dates[0]
+                        if len(dates) >= 2:
+                            match = re.search(r'(^\d{4}$)', dates[1])
+                            if match:
+                                publication['endDate'] = dates[1]
+
+        statement = []
+        if isinstance(value, GroupableOrderedDict):
+            items = value.iteritems(repeated=True)
+        else:
+            items = utils.iteritems(value)
+
+        index = 1
+        add_country = ind2 in (' ', '_', '1')
+        for blob_key, blob_value in items:
+            if blob_key in ('a', 'c'):
+                place_or_agent_data = build_place_or_agent_data(
+                    blob_key, blob_value, index, add_country)
+                if blob_key == 'a':
+                    add_country = False
+                statement.append(place_or_agent_data)
+            if blob_key != '__order__':
+                index += 1
+
+        publication['statement'] = statement
+    return publication or None
 
 
 @unimarctojson.over('formats', '^215..')