Skip to content

Commit

Permalink
Refactors image compression method in OCR wrappers and allows to also…
Browse files Browse the repository at this point in the history
… use webp instead of png

Also sets GoogleOCR to use webp by default
  • Loading branch information
Paethon committed Nov 2, 2023
1 parent b5db69f commit b83e04b
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The version numbers are according to [Semantic Versioning](http://semver.org/).
- Adds an environment variable `OCR_WRAPPER_CACHE_FILE` to specify an ocr cache file globally

### Changed

- Changed GoogleOCR to use WebP instead of PNG to transfer images to the cloud (reduces amount of transferred data by ~ 1/2)
### Fixed
- Adds forced conversion to RGB in pillow before sending data to OpenCV to fix a possible bug in Studio
- Fixes a rare bug where self-intersecting bounding boxes caused the OCR system to crash when using multi-pass OCR
Expand Down
2 changes: 1 addition & 1 deletion ocr_wrapper/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _get_ocr_response(self, img: Image.Image):
"""Gets the OCR response from AWS. Uses cached response if a cache file has been specified and the
document has been OCRed already"""
# Pack image in correct format
img_bytes = self._pil_img_to_png(img)
img_bytes = self._pil_img_to_compressed(img)

# Try to get cached response
response = self._get_from_shelf(img)
Expand Down
2 changes: 1 addition & 1 deletion ocr_wrapper/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _get_ocr_response(self, img: Image.Image):
"""Gets the OCR response from the Azure. Uses cached response if a cache file has been specified and the
document has been OCRed already"""
# Pack image in correct format
img_bytes = self._pil_img_to_png(img)
img_bytes = self._pil_img_to_compressed(img)
img_stream = BytesIO(img_bytes)

# Try to get cached response
Expand Down
2 changes: 1 addition & 1 deletion ocr_wrapper/google_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def _get_ocr_response(self, img: Image.Image):
"""Gets the OCR response from the Google cloud. Uses cached response if a cache file has been specified and the
document has been OCRed already"""
# Pack image in correct format
img_bytes = self._pil_img_to_png(img)
img_bytes = self._pil_img_to_compressed(img, compression="webp")
vision_img = vision.Image(content=img_bytes)

response = self._get_from_shelf(img) # Try to get cached response
Expand Down
16 changes: 11 additions & 5 deletions ocr_wrapper/ocr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,16 @@ def draw(image: Image.Image, boxes: list[BBox], texts: list[str]):
return image, " ".join(all_text)

@staticmethod
def _pil_img_to_png(image: Image.Image) -> bytes:
"""Converts a pil image to png in memory"""
def _pil_img_to_compressed(image: Image.Image, compression: str = "png") -> bytes:
"""Converts a pil image to "compressed" image (e.g. png, webp) in memory"""
with BytesIO() as output:
image.save(output, "PNG")
if compression.lower() == "png":
image.save(output, "PNG", compress_level=5)
elif compression.lower() == "webp":
image.save(output, "WebP", lossless=True, quality=0)
else:
raise Exception(f"Unsupported compression: {compression}")

output.seek(0)
return output.read()

Expand All @@ -178,7 +184,7 @@ def _get_from_shelf(self, img: Image.Image):
if self.cache_file is not None and os.path.exists(self.cache_file):
with self.shelve_mutex:
with shelve.open(self.cache_file, "r") as db:
img_bytes = self._pil_img_to_png(img)
img_bytes = self._pil_img_to_compressed(img)
img_hash = self._get_bytes_hash(img_bytes)
if img_hash in db.keys(): # We have a cached version
if self.verbose:
Expand All @@ -190,6 +196,6 @@ def _put_on_shelf(self, img: Image.Image, response):
if self.cache_file is not None:
with self.shelve_mutex:
with shelve.open(self.cache_file, "c") as db:
img_bytes = self._pil_img_to_png(img)
img_bytes = self._pil_img_to_compressed(img)
img_hash = self._get_bytes_hash(img_bytes)
db[img_hash] = response

0 comments on commit b83e04b

Please sign in to comment.