diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f6824c..9c2a30f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ The version numbers are according to [Semantic Versioning](http://semver.org/). ### Removed + +## Release v0.2.3 (2024-10-29) +### Changed +- OCR provider specific dependencies are now being loaded lazily to reduce the import time + ## Release v0.2.2 (2024-10-03) ### Added - Added OpenTelemetry to `GoogleAzureOCR`, `GoogleOCR`, `AzureOCR`, and `OcrWrapper` to enable tracing of the OCR process diff --git a/ocr_wrapper/aws.py b/ocr_wrapper/aws.py index 3fa5273..b5625a4 100644 --- a/ocr_wrapper/aws.py +++ b/ocr_wrapper/aws.py @@ -6,26 +6,8 @@ from .bbox import BBox from .ocr_wrapper import OcrCacheDisabled, OcrWrapper -try: - import boto3 -except ImportError: - _has_boto3 = False -else: - _has_boto3 = True - - -def requires_boto(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_boto3: - raise ImportError('AWS Textract requires missing "boto3" package.') - return fn(*args, **kwargs) - - return wrapper_decocator - class AwsOCR(OcrWrapper): - @requires_boto def __init__( self, *, @@ -34,10 +16,19 @@ def __init__( add_checkboxes: bool = False, verbose: bool = False ): - super().__init__(cache_file=cache_file, max_size=max_size, add_checkboxes=add_checkboxes, verbose=verbose) + try: + import boto3 + except ImportError: + raise ImportError('AwsOCR requires missing "boto3" package.') + + super().__init__( + cache_file=cache_file, + max_size=max_size, + add_checkboxes=add_checkboxes, + verbose=verbose, + ) self.client = boto3.client("textract", region_name="eu-central-1") - @requires_boto def _get_ocr_response(self, img: Image.Image): """Gets the OCR response from AWS. Uses cached response if a cache file has been specified and the document has been OCRed already""" @@ -52,7 +43,6 @@ def _get_ocr_response(self, img: Image.Image): self._put_on_shelf(img, response) return response - @requires_boto def _convert_ocr_response(self, response) -> tuple[List[BBox], dict[str, Any]]: """Converts the response given by Google OCR to a list of BBox""" bboxes = [] diff --git a/ocr_wrapper/azure.py b/ocr_wrapper/azure.py index 1881d95..bd48559 100644 --- a/ocr_wrapper/azure.py +++ b/ocr_wrapper/azure.py @@ -13,33 +13,9 @@ from .ocr_wrapper import OcrCacheDisabled, OcrWrapper, Union from .utils import set_image_attributes -try: - from msrest.authentication import CognitiveServicesCredentials - from msrest.exceptions import ClientRequestError - - from azure.cognitiveservices.vision.computervision import ComputerVisionClient - from azure.cognitiveservices.vision.computervision.models import ( - ComputerVisionOcrErrorException, - OperationStatusCodes, - ) -except ImportError: - _has_azure = False -else: - _has_azure = True - tracer = trace.get_tracer(__name__) -def requires_azure(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_azure: - raise ImportError('Azure Read requires missing "azure-cognitiveservices-vision-computervision" package.') - return fn(*args, **kwargs) - - return wrapper_decocator - - def _discretize_angle_to_90_deg(rotation: float) -> int: """Discretize an angle to the nearest 90 degrees""" return int(((rotation + 45) // 90 * 90) % 360) @@ -70,7 +46,6 @@ def _determine_endpoint_and_key(endpoint: Optional[str], key: Optional[str]) -> class AzureOCR(OcrWrapper): - @requires_azure def __init__( self, *, @@ -85,6 +60,11 @@ def __init__( add_qr_barcodes: bool = False, verbose: bool = False, ): + try: + from msrest.authentication import CognitiveServicesCredentials + from azure.cognitiveservices.vision.computervision import ComputerVisionClient + except ImportError: + raise ImportError('AzureOCR requires missing "azure-cognitiveservices-vision-computervision" package.') super().__init__( cache_file=cache_file, max_size=max_size, @@ -99,11 +79,19 @@ def __init__( endpoint, key = _determine_endpoint_and_key(endpoint, key) self.client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(key)) - @requires_azure @tracer.start_as_current_span(name="AzureOCR.get_ocr_response") def _get_ocr_response(self, img: Image.Image): """Gets the OCR response from the Azure. Uses cached response if a cache file has been specified and the document has been OCRed already""" + try: + from msrest.exceptions import ClientRequestError + from azure.cognitiveservices.vision.computervision.models import ( + ComputerVisionOcrErrorException, + OperationStatusCodes, + ) + except ImportError: + raise ImportError('AzureOCR requires missing "azure-cognitiveservices-vision-computervision" package.') + span = trace.get_current_span() set_image_attributes(span, img) @@ -158,7 +146,6 @@ def _get_ocr_response(self, img: Image.Image): self._put_on_shelf(img, read_result) return read_result - @requires_azure @tracer.start_as_current_span(name="AzureOCR._convert_ocr_response") def _convert_ocr_response(self, response) -> tuple[List[BBox], dict[str, Any]]: """Converts the response given by Azure Read to a list of BBox""" diff --git a/ocr_wrapper/easy_ocr.py b/ocr_wrapper/easy_ocr.py index 605ba83..525955c 100644 --- a/ocr_wrapper/easy_ocr.py +++ b/ocr_wrapper/easy_ocr.py @@ -9,26 +9,8 @@ from .bbox import BBox from .ocr_wrapper import OcrWrapper, OcrCacheDisabled -try: - import easyocr -except ImportError: - _has_easyocr = False -else: - _has_easyocr = True - - -def requires_easyocr(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_easyocr: - raise ImportError('Easy OCR requires missing "easyocr" package.') - return fn(*args, **kwargs) - - return wrapper_decocator - class EasyOCR(OcrWrapper): - @requires_easyocr def __init__( self, *, @@ -46,6 +28,11 @@ def __init__( languages: A string or a list of languages to use for OCR from the list here: https://www.jaided.ai/easyocr/ width_thr: Distance where bounding boxes are still getting merged into one """ + try: + import easyocr + except ImportError: + raise ImportError('EasyOCR requires missing "easyocr" package.') + super().__init__( cache_file=cache_file, max_size=max_size, @@ -58,7 +45,6 @@ def __init__( self.client = easyocr.Reader(self.languages, **kwargs) - @requires_easyocr def _get_ocr_response(self, img: Image.Image): """Gets the OCR response from EasyOCR. Uses a cached response if a cache file has been specified and the document has been OCRed already""" @@ -69,7 +55,6 @@ def _get_ocr_response(self, img: Image.Image): self._put_on_shelf(img, response) return response - @requires_easyocr def _convert_ocr_response(self, response) -> tuple[List[BBox], dict[str, Any]]: """Converts the response given by EasyOCR to a list of BBox""" bboxes, confidences = [], [] diff --git a/ocr_wrapper/google_azure_ocr.py b/ocr_wrapper/google_azure_ocr.py index 543d0d4..57511f3 100644 --- a/ocr_wrapper/google_azure_ocr.py +++ b/ocr_wrapper/google_azure_ocr.py @@ -295,7 +295,6 @@ def __init__(self, bboxes: list[BBox]): for i, bbox in enumerate(bboxes): self.rtree.insert(i, bbox.get_shapely_polygon().bounds) - @tracer.start_as_current_span(name="BBoxOverlapChecker.get_overlapping_bboxes") def get_overlapping_bboxes(self, bbox: BBox, threshold: float = 0.01) -> list[BBox]: """Returns the bboxes that overlap with the given bbox. @@ -318,7 +317,6 @@ def get_overlapping_bboxes(self, bbox: BBox, threshold: float = 0.01) -> list[BB ): overlapping_bboxes.append(self.bboxes[i]) - span.set_attribute("nb_overlapping_bboxes", len(overlapping_bboxes)) return overlapping_bboxes diff --git a/ocr_wrapper/google_document_ocr_checkbox_detector.py b/ocr_wrapper/google_document_ocr_checkbox_detector.py index a11b6ec..9e87a7e 100644 --- a/ocr_wrapper/google_document_ocr_checkbox_detector.py +++ b/ocr_wrapper/google_document_ocr_checkbox_detector.py @@ -1,8 +1,7 @@ from __future__ import annotations -import functools import os -from typing import Optional, Union +from typing import TYPE_CHECKING, Optional, Union from PIL import Image @@ -11,25 +10,9 @@ from .utils import resize_image -try: - from google.api_core.client_options import ClientOptions +# Load the Google Cloud Document AI client library globally only for type checking (needed for argument types) +if TYPE_CHECKING: from google.cloud import documentai -except ImportError: - _has_gcloud = False -else: - _has_gcloud = True - - -def requires_gcloud(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_gcloud: - raise ImportError( - "GoogleDocumentOcrCheckboxDetector OCR requires missing 'google-cloud-documentai' package." - ) - return fn(*args, **kwargs) - - return wrapper_decocator def _val_or_env(val: Optional[str], env: str, default: Optional[str] = None) -> Optional[str]: @@ -62,7 +45,6 @@ def _visual_element_to_bbox(visual_element) -> tuple[BBox, float]: class GoogleDocumentOcrCheckboxDetector: - @requires_gcloud def __init__( self, project_id: Optional[str] = None, @@ -71,6 +53,12 @@ def __init__( processor_version: Optional[str] = None, max_size: Optional[int] = 4096, ): + try: + from google.api_core.client_options import ClientOptions + from google.cloud import documentai + except ImportError: + raise ImportError("GoogleDocumentOcrCheckboxDetector requires missing 'google-cloud-documentai' package.") + self.project_id = _val_or_env(project_id, "GOOGLE_DOC_OCR_PROJECT_ID") self.location = _val_or_env(location, "GOOGLE_DOC_OCR_LOCATION", default="eu") self.processor_id = _val_or_env(processor_id, "GOOGLE_DOC_OCR_PROCESSOR_ID") @@ -105,8 +93,12 @@ def __init__( self.project_id, self.location, self.processor_id, self.processor_version ) - @requires_gcloud def detect_checkboxes(self, page: Union[Image.Image, documentai.RawDocument]) -> tuple[list[BBox], list[float]]: + try: + from google.cloud import documentai + except ImportError: + raise ImportError("GoogleDocumentOcrCheckboxDetector requires missing 'google-cloud-documentai' package.") + if isinstance(page, Image.Image): if self.max_size is not None: page = resize_image(img=page, max_size=self.max_size) diff --git a/ocr_wrapper/google_ocr.py b/ocr_wrapper/google_ocr.py index a9a39f3..f2f8dd7 100644 --- a/ocr_wrapper/google_ocr.py +++ b/ocr_wrapper/google_ocr.py @@ -1,6 +1,5 @@ from __future__ import annotations -import functools import os import time from time import sleep @@ -15,24 +14,6 @@ tracer = trace.get_tracer(__name__) -try: - from google.cloud import vision -except ImportError: - _has_gcloud = False -else: - _has_gcloud = True - - -def requires_gcloud(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_gcloud: - raise ImportError('Google OCR requires missing "google-cloud-vision" package.') - return fn(*args, **kwargs) - - return wrapper_decocator - - # Define a list of languages which are written from right to left. This is needed to determine the rotation of the document rtl_languages = [ "ar", @@ -183,7 +164,6 @@ class GoogleOCR(OcrWrapper): Google Cloud Vision API. """ - @requires_gcloud def __init__( self, *, @@ -197,6 +177,11 @@ def __init__( add_qr_barcodes: bool = False, verbose: bool = False, ): + try: + from google.cloud import vision + except ImportError: + raise ImportError('GoogleOCR requires missing "google-cloud-vision" package.') + super().__init__( cache_file=cache_file, max_size=max_size, @@ -219,11 +204,15 @@ def __init__( self.endpoint = endpoint self.client = vision.ImageAnnotatorClient(client_options={"api_endpoint": self.endpoint}) - @requires_gcloud @tracer.start_as_current_span(name="GoogleOCR._get_ocr") def _get_ocr_response(self, img: Image.Image): """Gets the OCR response from the Google cloud. Uses cached response if a cache file has been specified and the document has been OCRed already""" + try: + from google.cloud import vision + except ImportError: + raise ImportError('GoogleOCR requires missing "google-cloud-vision" package.') + span = trace.get_current_span() set_image_attributes(span, img) @@ -257,7 +246,6 @@ def _get_ocr_response(self, img: Image.Image): self._put_on_shelf(img, response) return response - @requires_gcloud @tracer.start_as_current_span(name="GoogleOCR._convert_ocr_response") def _convert_ocr_response(self, response) -> tuple[List[BBox], dict[str, Any]]: """Converts the response given by Google OCR to a list of BBox""" diff --git a/ocr_wrapper/paddleocr.py b/ocr_wrapper/paddleocr.py index 74179d4..da70a4d 100644 --- a/ocr_wrapper/paddleocr.py +++ b/ocr_wrapper/paddleocr.py @@ -7,26 +7,8 @@ from .bbox import BBox from .ocr_wrapper import OcrCacheDisabled, OcrWrapper -try: - import paddleocr -except ImportError: - _has_paddle = False -else: - _has_paddle = True - - -def requires_paddle(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_paddle: - raise ImportError('PaddleOCR requires missing "paddleocr" package.') - return fn(*args, **kwargs) - - return wrapper_decocator - class PaddleOCR(OcrWrapper): - @requires_paddle def __init__( self, *, @@ -36,6 +18,11 @@ def __init__( add_qr_barcodes: bool = False, verbose: bool = False ): + try: + import paddleocr + except ImportError: + raise ImportError('PaddleOCR requires missing "paddleocr" package.') + super().__init__( cache_file=cache_file, max_size=max_size, @@ -64,7 +51,6 @@ def _resize(img: Image.Image) -> Tuple[Image.Image, float]: else: return img, 1.0 - @requires_paddle def _get_ocr_response(self, img: Image.Image): # Try to get cached response response = self._get_from_shelf(img) @@ -76,7 +62,6 @@ def _get_ocr_response(self, img: Image.Image): self._put_on_shelf(img, response) return response - @requires_paddle def _convert_ocr_response(self, response) -> tuple[List[BBox], dict[str, Any]]: """Converts the response given by Google OCR to a list of BBox""" paddle_resp, resize_ratio = response diff --git a/ocr_wrapper/qr_barcodes.py b/ocr_wrapper/qr_barcodes.py index 77fdd58..af4ec19 100644 --- a/ocr_wrapper/qr_barcodes.py +++ b/ocr_wrapper/qr_barcodes.py @@ -4,28 +4,25 @@ from ocr_wrapper.bbox import BBox -try: - from pyzbar import pyzbar -except ImportError: - _has_pyzbar = False -else: - _has_pyzbar = True - - -def requires_paddle(fn): - @functools.wraps(fn) - def wrapper_decocator(*args, **kwargs): - if not _has_pyzbar: - raise ImportError('QR and barcode detection and decoding requires missing "pyzbar" package.') - return fn(*args, **kwargs) - - return wrapper_decocator - def _decoded_to_coordinate_list(decoded) -> list[float]: """Takes a Decoded object and returns a list of coordinates in the format [TLx, TLy, TRx, TRy, BRx, BRy, BLx, BLy]""" - left, top, width, height = decoded.rect.left, decoded.rect.top, decoded.rect.width, decoded.rect.height - return [left, top, left + width, top, left + width, top + height, left, top + height] + left, top, width, height = ( + decoded.rect.left, + decoded.rect.top, + decoded.rect.width, + decoded.rect.height, + ) + return [ + left, + top, + left + width, + top, + left + width, + top + height, + left, + top + height, + ] def _decoded_to_ocr_text(decoded) -> str: @@ -65,9 +62,12 @@ def _detect_raw_qr_barcodes(image: Image.Image) -> list: return decoded_objects -@requires_paddle def detect_qr_barcodes(image: Image.Image) -> list[BBox]: """Detects barcodes in an image and returns a list of BBox objects""" + try: + from pyzbar import pyzbar + except ImportError: + raise ImportError('QR and barcode detection and decoding requires missing "pyzbar" package.') decoded_objects = _detect_raw_qr_barcodes(image) bboxes = [_decoded_to_bbox(decoded) for decoded in decoded_objects] # Normalize the BBoxes so they are not in pixel coordinates anymore diff --git a/pyproject.toml b/pyproject.toml index 0a1511b..4e1b5d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ocr_wrapper" -version = "0.2.2" +version = "0.2.3" license.file = "LICENSE.md" authors = [{ name = "DeepOpinion", email = "hello@deepopinion.ai" }] urls.repository = "https://github.com/deepopinion/ocr_wrapper"