Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attachment work components #123

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bluebell/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.1.0"
__version__ = "3.2.0"
34 changes: 32 additions & 2 deletions bluebell/xml.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import re
from itertools import groupby, chain
from collections import defaultdict
from itertools import groupby

from cobalt.akn import get_maker, StructuredDocument
import lxml.etree as etree
from cobalt.akn import get_maker, StructuredDocument, FrbrUri


class IdGenerator:
Expand Down Expand Up @@ -404,6 +405,7 @@ def post_process(self, xml):
xml = self.resolve_displaced_content(xml)
xml = self.normalise(xml)
xml = self.generate_eids(xml)
xml = self.rewrite_all_attachment_work_components(xml)
xml = self.set_attachment_titles(xml)
return xml

Expand Down Expand Up @@ -453,6 +455,34 @@ def get_displaced_content(start, name, marker):

return xml

def rewrite_all_attachment_work_components(self, xml):
""" Set unique and accurate work components on all attachments.
When attachments are renamed, this can affect other attachments' work components as well.
And because they can be edited independently, postprocessing might be our only opportunity to update the others.
"""
counter = defaultdict(lambda: 0)
ns = xml.nsmap[None]
for doc in xml.xpath('//a:attachment/a:doc', namespaces={'a': ns}):
parent = doc.getparent().xpath('ancestor::a:attachment/a:doc/a:meta/a:identification/a:FRBRWork/a:FRBRthis', namespaces={'a': ns})
prefix = FrbrUri.parse(parent[-1].attrib['value']).work_component + '/' if parent else ''
name = doc.attrib['name']
# e.g. schedule_1/schedule, schedule_2/appendix, etc.
prefix_name = f'{prefix}{name}'
counter[prefix_name] += 1
# e.g. schedule_1/schedule_3, schedule_2/appendix_1, etc.
work_component = f'{prefix_name}_{counter[prefix_name]}'

for part in ['a:FRBRWork', 'a:FRBRExpression', 'a:FRBRManifestation']:
for element in doc.xpath('./a:meta/a:identification/' + part + '/a:FRBRthis', namespaces={'a': ns}):
frbr_uri = FrbrUri.parse(element.attrib['value'])
frbr_uri.work_component = work_component
element.attrib['value'] = {
'a:FRBRWork': lambda: frbr_uri.work_uri(),
'a:FRBRExpression': lambda: frbr_uri.expression_uri(),
'a:FRBRManifestation': lambda: frbr_uri.manifestation_uri(),
}[part]()
return xml

def set_attachment_titles(self, xml):
""" Derive attachment aliases from their headings, if available.
"""
Expand Down
117 changes: 117 additions & 0 deletions tests/rewrite_work_components/basic_in.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
<act contains="singleVersion" name="act">
<meta>
<identification source="#cobalt">
<FRBRWork>
<FRBRthis value="/akn/za/act/2021/6/!main"/>
<FRBRuri value="/akn/za/act/2021/6"/>
<FRBRalias name="title" value="Work with uncommenced provisions, only one commencement so far"/>
<FRBRdate date="2021" name="Generation"/>
<FRBRauthor href=""/>
<FRBRcountry value="za"/>
<FRBRnumber value="6"/>
</FRBRWork>
<FRBRExpression>
<FRBRthis value="/akn/za/act/2021/6/sot@2022-08-12/!main"/>
<FRBRuri value="/akn/za/act/2021/6/sot@2022-08-12"/>
<FRBRdate date="2022-08-12" name="Generation"/>
<FRBRauthor href=""/>
<FRBRlanguage language="sot"/>
</FRBRExpression>
<FRBRManifestation>
<FRBRthis value="/akn/za/act/2021/6/sot@2022-08-12/!main"/>
<FRBRuri value="/akn/za/act/2021/6/sot@2022-08-12"/>
<FRBRdate date="2022-02-03" name="Generation"/>
<FRBRauthor href=""/>
</FRBRManifestation>
</identification>
<publication date="2021-10-11" name="" number="" showAs=""/>
<lifecycle source="#Laws-Africa">
<eventRef date="2021-10-20" eId="amendment-2021-10-20" source="#amendment-0-source" type="amendment"/>
</lifecycle>
<references source="#cobalt">
<TLCOrganization eId="Laws-Africa" href="http://localhost:8000" showAs="Laws.Africa"/>
<TLCOrganization eId="cobalt" href="https://github.com/laws-africa/cobalt" showAs="cobalt"/>
<passiveRef eId="amendment-0-source" href="/akn/za/act/2021/6" showAs="Work with uncommenced provisions, only one commencement so far"/>
</references>
</meta>
<body>
<chapter eId="chp_1">
<num>1</num>
<heading>Heading</heading>
</chapter>
</body>
<attachments>
<attachment eId="att_1">
<heading>a heading</heading>
<subheading>subheading</subheading>
<doc name="annexure">
<meta>
<identification source="#cobalt">
<FRBRWork>
<FRBRthis value="/akn/xx-playground/act/2021/1/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1"/>
<FRBRalias name="title" value="Untitled"/>
<FRBRdate date="2021" name="Generation"/>
<FRBRauthor href=""/>
<FRBRcountry value="xx-playground"/>
<FRBRnumber value="1"/>
</FRBRWork>
<FRBRExpression>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
<FRBRlanguage language="eng"/>
</FRBRExpression>
<FRBRManifestation>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
</FRBRManifestation>
</identification>
</meta>
<mainBody>
<p eId="att_1__p_1">some text</p>
</mainBody>
</doc>
</attachment>
<attachment eId="att_1">
<heading>a heading</heading>
<subheading>subheading</subheading>
<doc name="annexure">
<meta>
<identification source="#cobalt">
<FRBRWork>
<FRBRthis value="/akn/xx-playground/act/2021/1/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1"/>
<FRBRalias name="title" value="Untitled"/>
<FRBRdate date="2021" name="Generation"/>
<FRBRauthor href=""/>
<FRBRcountry value="xx-playground"/>
<FRBRnumber value="1"/>
</FRBRWork>
<FRBRExpression>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
<FRBRlanguage language="eng"/>
</FRBRExpression>
<FRBRManifestation>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
</FRBRManifestation>
</identification>
</meta>
<mainBody>
<p eId="att_1__p_1">some text</p>
</mainBody>
</doc>
</attachment>
</attachments>
</act>
</akomaNtoso>
117 changes: 117 additions & 0 deletions tests/rewrite_work_components/basic_out.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
<act contains="singleVersion" name="act">
<meta>
<identification source="#cobalt">
<FRBRWork>
<FRBRthis value="/akn/za/act/2021/6/!main"/>
<FRBRuri value="/akn/za/act/2021/6"/>
<FRBRalias name="title" value="Work with uncommenced provisions, only one commencement so far"/>
<FRBRdate date="2021" name="Generation"/>
<FRBRauthor href=""/>
<FRBRcountry value="za"/>
<FRBRnumber value="6"/>
</FRBRWork>
<FRBRExpression>
<FRBRthis value="/akn/za/act/2021/6/sot@2022-08-12/!main"/>
<FRBRuri value="/akn/za/act/2021/6/sot@2022-08-12"/>
<FRBRdate date="2022-08-12" name="Generation"/>
<FRBRauthor href=""/>
<FRBRlanguage language="sot"/>
</FRBRExpression>
<FRBRManifestation>
<FRBRthis value="/akn/za/act/2021/6/sot@2022-08-12/!main"/>
<FRBRuri value="/akn/za/act/2021/6/sot@2022-08-12"/>
<FRBRdate date="2022-02-03" name="Generation"/>
<FRBRauthor href=""/>
</FRBRManifestation>
</identification>
<publication date="2021-10-11" name="" number="" showAs=""/>
<lifecycle source="#Laws-Africa">
<eventRef date="2021-10-20" eId="amendment-2021-10-20" source="#amendment-0-source" type="amendment"/>
</lifecycle>
<references source="#cobalt">
<TLCOrganization eId="Laws-Africa" href="http://localhost:8000" showAs="Laws.Africa"/>
<TLCOrganization eId="cobalt" href="https://github.com/laws-africa/cobalt" showAs="cobalt"/>
<passiveRef eId="amendment-0-source" href="/akn/za/act/2021/6" showAs="Work with uncommenced provisions, only one commencement so far"/>
</references>
</meta>
<body>
<chapter eId="chp_1">
<num>1</num>
<heading>Heading</heading>
</chapter>
</body>
<attachments>
<attachment eId="att_1">
<heading>a heading</heading>
<subheading>subheading</subheading>
<doc name="annexure">
<meta>
<identification source="#cobalt">
<FRBRWork>
<FRBRthis value="/akn/xx-playground/act/2021/1/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1"/>
<FRBRalias name="title" value="Untitled"/>
<FRBRdate date="2021" name="Generation"/>
<FRBRauthor href=""/>
<FRBRcountry value="xx-playground"/>
<FRBRnumber value="1"/>
</FRBRWork>
<FRBRExpression>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
<FRBRlanguage language="eng"/>
</FRBRExpression>
<FRBRManifestation>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_1"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
</FRBRManifestation>
</identification>
</meta>
<mainBody>
<p eId="att_1__p_1">some text</p>
</mainBody>
</doc>
</attachment>
<attachment eId="att_1">
<heading>a heading</heading>
<subheading>subheading</subheading>
<doc name="annexure">
<meta>
<identification source="#cobalt">
<FRBRWork>
<FRBRthis value="/akn/xx-playground/act/2021/1/!annexure_2"/>
<FRBRuri value="/akn/xx-playground/act/2021/1"/>
<FRBRalias name="title" value="Untitled"/>
<FRBRdate date="2021" name="Generation"/>
<FRBRauthor href=""/>
<FRBRcountry value="xx-playground"/>
<FRBRnumber value="1"/>
</FRBRWork>
<FRBRExpression>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_2"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
<FRBRlanguage language="eng"/>
</FRBRExpression>
<FRBRManifestation>
<FRBRthis value="/akn/xx-playground/act/2021/1/eng@2022-02-07/!annexure_2"/>
<FRBRuri value="/akn/xx-playground/act/2021/1/eng@2022-02-07"/>
<FRBRdate date="2022-02-08" name="Generation"/>
<FRBRauthor href=""/>
</FRBRManifestation>
</identification>
</meta>
<mainBody>
<p eId="att_1__p_1">some text</p>
</mainBody>
</doc>
</attachment>
</attachments>
</act>
</akomaNtoso>
26 changes: 26 additions & 0 deletions tests/test_work_components.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
from unittest import TestCase

from lxml import etree

from tests.support import ParserSupport


class AttachmentWorkComponentsTestCase(ParserSupport, TestCase):
maxDiff = None

def rewrite_and_compare(self, xml_in, xml_out):
dir = os.path.join(os.path.dirname(__file__), 'rewrite_work_components')
with open(os.path.join(dir, f'{xml_in}.xml'), 'rt') as f:
old_xml = f.read()
with open(os.path.join(dir, f'{xml_out}.xml'), 'rt') as f:
expected = f.read()

xml = etree.fromstring(old_xml)
self.generator.rewrite_all_attachment_work_components(xml)
actual = self.tostring(xml)

self.assertEqual(expected, actual)

def test_fix_work_components_basic(self):
self.rewrite_and_compare('basic_in', 'basic_out')
Loading