Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions document_merge_service/api/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,58 @@ class DefaultConfig(AppConfig):
def ready(self):
if "sqlite3" in settings.DATABASES["default"]["ENGINE"]: # pragma: no cover
TextField.register_lookup(IContains, lookup_name="search")
mitigate_docxtpl_corruption_bug()


def mitigate_docxtpl_corruption_bug():
# This is basically monkey-patching this PR:
# https://github.com/python-openxml/python-docx/pull/1436

# Hold my beer!
from docx.opc.constants import RELATIONSHIP_TYPE

if hasattr(RELATIONSHIP_TYPE, "CORE_PROPERTIES_OFFICEDOCUMENT"): # pragma: no cover
raise Exception(
"The docxtpl mitigation is no longer required, please remove the monkeypatch code"
)

RELATIONSHIP_TYPE.CORE_PROPERTIES_OFFICEDOCUMENT = (
"http://schemas.openxmlformats.org/officedocument/2006/relationships"
"/metadata/core-properties"
)

from docx.opc.package import RT, CorePropertiesPart, OpcPackage, cast

@property
def _core_properties_part(self) -> CorePropertiesPart:
"""|CorePropertiesPart| object related to this package.

Creates a default core properties part if one is not present (not common).
"""
try:
return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES))
except KeyError:
try:
office_document_part = self.part_related_by(
RT.CORE_PROPERTIES_OFFICEDOCUMENT # type: ignore
)
rel = self.relate_to(
office_document_part,
RT.CORE_PROPERTIES_OFFICEDOCUMENT, # type: ignore
)
self.rels[rel].reltype = RT.CORE_PROPERTIES
return cast(CorePropertiesPart, office_document_part)
except KeyError:
core_properties_part = CorePropertiesPart.default(self)
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
return core_properties_part

OpcPackage._core_properties_part = _core_properties_part

from docx.opc.rel import _Relationship

@_Relationship.reltype.setter
def reltype(self, value: str):
self._reltype = value

_Relationship.reltype = reltype
Binary file not shown.
37 changes: 36 additions & 1 deletion document_merge_service/api/tests/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import json
import os
import re
from collections import namedtuple
import tempfile
import zipfile
from collections import Counter, namedtuple

import openpyxl
import pytest
Expand Down Expand Up @@ -916,3 +918,36 @@ def test_placeholder_with_unsupported_operand(
with pytest.raises(exceptions.ValidationError) as exc_info:
serializer.validate({"data": {"E_BAU_NUMBER": 12345}})
assert exc_info.value.args[0] == expected_error


def test_template_merge_docx_libreoffice_bug(
db, client, mock_filefield_name_validation, template, snapshot
):
"""Verify a certain docx corruption bug does not occur.

Certain versions of python-docx and python-docxtemplate cause corruption
of files that were originally created with LibreOffice. One effect of that
corruption is a duplicate entry in the document-internal files; there
are two docProps/core.xml files in the resulting document.
"""
file = django_file("created_with_libreoffice.docx")
template.template.save(os.path.basename(file.name), file)
template.engine = "docx-template"
template.save()
url = reverse("template-merge", args=[template.pk])

response = client.post(url, data={"data": {"test": "Test input"}}, format="json")

with tempfile.NamedTemporaryFile(suffix=".docx") as tmp:
tmp.write(response.content)
tmp.seek(0)

zzz = zipfile.ZipFile(tmp.name)
name_counter = Counter()
name_counter.update([f.filename for f in zzz.filelist])

problematic_names = {
name: count for name, count in name_counter.most_common() if count > 1
}

assert problematic_names == {}, "Duplicate entry in docx file's internal structure"
Loading