[FIX] odoo: zlib incorrect data check

Pdf files with badly compressed data can throw zlib errors when the Odoo banner is added in the corner (Original Bills). These files are readable but cause a traceback when PyPDF2 tries to decompress the data. If we decompress the blocks byte by byte and ignore the error, the resulting file seems to be identical to the source file (aside from the odoo banner). Largely inspired by py-pdf/pypdf#422 opw-3151302
odoo-dev · Mar 22, 2023 · eeabefd · eeabefd
1 parent 4b023b1
commit eeabefd
Showing 1 changed file with 21 additions and 1 deletion.
diff --git a/odoo/__init__.py b/odoo/__init__.py
@@ -71,15 +71,35 @@ def gevent_wait_callback(conn, timeout=None):
 # ensure that zlib does not throw error -5 when decompressing
 # because some pdf won't fit into allocated memory
 # https://docs.python.org/3/library/zlib.html#zlib.decompressobj
+# If zlib throws a -3 error (incorrect data check), try to
+# decompress as much as possible and ignore the error.
 # ----------------------------------------------------------
 import PyPDF2
 
 try:
     import zlib
+    from io import BytesIO
+
+    def _decompress_corrupted(data):
+        zobj = zlib.decompressobj()
+        f = BytesIO(data)
+        result_data = b''
+        buffer = f.read(1)
+        try:
+            while buffer:
+                result_data += zobj.decompress(buffer)
+                buffer = f.read(1)
+        except zlib.error as e:
+            if e.args[0] != 'Error -3 while decompressing data: incorrect data check':
+                raise e
+        return result_data
 
     def _decompress(data):
         zobj = zlib.decompressobj()
-        return zobj.decompress(data)
+        try:
+            return zobj.decompress(data)
+        except zlib.error:
+            return _decompress_corrupted(data)
 
     PyPDF2.filters.decompress = _decompress
 except ImportError: