Refactors image compression method in OCR wrappers and allows to also…

… use webp instead of png Also sets GoogleOCR to use webp by default
deepopinion · Nov 2, 2023 · b83e04b · b83e04b
1 parent b5db69f
commit b83e04b
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,7 +9,7 @@ The version numbers are according to [Semantic Versioning](http://semver.org/).
 - Adds an environment variable `OCR_WRAPPER_CACHE_FILE` to specify an ocr cache file globally
 
 ### Changed
-
+- Changed GoogleOCR to use WebP instead of PNG to transfer images to the cloud (reduces amount of transferred data by ~ 1/2)
 ### Fixed
 - Adds forced conversion to RGB in pillow before sending data to OpenCV to fix a possible bug in Studio
 - Fixes a rare bug where self-intersecting bounding boxes caused the OCR system to crash when using multi-pass OCR

diff --git a/ocr_wrapper/aws.py b/ocr_wrapper/aws.py
@@ -48,7 +48,7 @@ def _get_ocr_response(self, img: Image.Image):
         """Gets the OCR response from AWS. Uses cached response if a cache file has been specified and the
         document has been OCRed already"""
         # Pack image in correct format
-        img_bytes = self._pil_img_to_png(img)
+        img_bytes = self._pil_img_to_compressed(img)
 
         # Try to get cached response
         response = self._get_from_shelf(img)

diff --git a/ocr_wrapper/azure.py b/ocr_wrapper/azure.py
@@ -62,7 +62,7 @@ def _get_ocr_response(self, img: Image.Image):
         """Gets the OCR response from the Azure. Uses cached response if a cache file has been specified and the
         document has been OCRed already"""
         # Pack image in correct format
-        img_bytes = self._pil_img_to_png(img)
+        img_bytes = self._pil_img_to_compressed(img)
         img_stream = BytesIO(img_bytes)
 
         # Try to get cached response

diff --git a/ocr_wrapper/google_ocr.py b/ocr_wrapper/google_ocr.py
@@ -182,7 +182,7 @@ def _get_ocr_response(self, img: Image.Image):
         """Gets the OCR response from the Google cloud. Uses cached response if a cache file has been specified and the
         document has been OCRed already"""
         # Pack image in correct format
-        img_bytes = self._pil_img_to_png(img)
+        img_bytes = self._pil_img_to_compressed(img, compression="webp")
         vision_img = vision.Image(content=img_bytes)
 
         response = self._get_from_shelf(img)  # Try to get cached response

diff --git a/ocr_wrapper/ocr_wrapper.py b/ocr_wrapper/ocr_wrapper.py
@@ -158,10 +158,16 @@ def draw(image: Image.Image, boxes: list[BBox], texts: list[str]):
         return image, " ".join(all_text)
 
     @staticmethod
-    def _pil_img_to_png(image: Image.Image) -> bytes:
-        """Converts a pil image to png in memory"""
+    def _pil_img_to_compressed(image: Image.Image, compression: str = "png") -> bytes:
+        """Converts a pil image to "compressed" image (e.g. png, webp) in memory"""
         with BytesIO() as output:
-            image.save(output, "PNG")
+            if compression.lower() == "png":
+                image.save(output, "PNG", compress_level=5)
+            elif compression.lower() == "webp":
+                image.save(output, "WebP", lossless=True, quality=0)
+            else:
+                raise Exception(f"Unsupported compression: {compression}")
+
             output.seek(0)
             return output.read()
 
@@ -178,7 +184,7 @@ def _get_from_shelf(self, img: Image.Image):
         if self.cache_file is not None and os.path.exists(self.cache_file):
             with self.shelve_mutex:
                 with shelve.open(self.cache_file, "r") as db:
-                    img_bytes = self._pil_img_to_png(img)
+                    img_bytes = self._pil_img_to_compressed(img)
                     img_hash = self._get_bytes_hash(img_bytes)
                     if img_hash in db.keys():  # We have a cached version
                         if self.verbose:
@@ -190,6 +196,6 @@ def _put_on_shelf(self, img: Image.Image, response):
         if self.cache_file is not None:
             with self.shelve_mutex:
                 with shelve.open(self.cache_file, "c") as db:
-                    img_bytes = self._pil_img_to_png(img)
+                    img_bytes = self._pil_img_to_compressed(img)
                     img_hash = self._get_bytes_hash(img_bytes)
                     db[img_hash] = response