From 2fdac319b112d677ed05287db5a97c7915f2e4ee Mon Sep 17 00:00:00 2001 From: Gmgge <48548141+Gmgge@users.noreply.github.com> Date: Thu, 21 Sep 2023 14:51:32 +0800 Subject: [PATCH] =?UTF-8?q?fix:=E4=BF=AE=E5=A4=8D=E9=80=9A=E9=81=93?= =?UTF-8?q?=E6=95=B0=E4=B8=8D=E5=8C=B9=E9=85=8D=E9=80=A0=E6=88=90=E7=9A=84?= =?UTF-8?q?PPOCRLabel=E5=90=AF=E5=8A=A8=E5=A4=B1=E8=B4=A5=E9=97=AE?= =?UTF-8?q?=E9=A2=98#10748,=E6=A0=B9=E6=8D=AE=E6=9B=B4=E6=96=B0=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E5=8F=91=E7=8E=B0#10655=EF=BC=8C=E7=94=B1=E4=BA=8Epad?= =?UTF-8?q?dleocr=E4=B8=AD=E5=A2=9E=E5=8A=A0=E4=BA=86=E5=AF=B9=E9=80=8F?= =?UTF-8?q?=E6=98=8E=E8=89=B2=E7=9A=84=E9=9C=80=E6=B1=82=EF=BC=8C=E7=94=B1?= =?UTF-8?q?=E4=BA=8E=E5=9C=A8check=5Fimg=E4=B8=AD=E4=BB=A5=E5=8F=8A?= =?UTF-8?q?=E5=AD=98=E5=9C=A8=E5=AF=B9=E5=8D=95=E9=80=9A=E9=81=93=E5=9B=BE?= =?UTF-8?q?=E5=83=8F=E8=BD=AC=E4=B8=89=E9=80=9A=E9=81=93=E5=9B=BE=E5=83=8F?= =?UTF-8?q?=E7=9A=84=E5=A4=84=E7=90=86=EF=BC=8C=E5=9B=A0=E6=AD=A4=E5=B0=86?= =?UTF-8?q?=E8=AF=A5=E5=9B=9B=E9=80=9A=E9=81=93=E5=9B=BE=E5=83=8F=E5=A4=84?= =?UTF-8?q?=E7=90=86=E6=88=90=E4=B8=89=E9=80=9A=E9=81=93=E7=9A=84=E5=9B=BE?= =?UTF-8?q?=E5=83=8F=E4=B9=9F=E6=94=BE=E5=85=A5=E8=AF=A5check=5Fimg?= =?UTF-8?q?=EF=BC=8C=E5=B9=B6=E7=BB=9F=E4=B8=80=E4=B8=89=E9=80=9A=E9=81=93?= =?UTF-8?q?=E5=9B=BE=E5=83=8F=E8=BE=93=E5=87=BA=E9=80=BB=E8=BE=91=E3=80=82?= =?UTF-8?q?=20(#10847)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add:由于函数复杂度增高,增加注释。 --- paddleocr.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index dc92cbf6b7..36980aec44 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -516,7 +516,19 @@ def img_decode(content: bytes): return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED) -def check_img(img): +def check_img(img, alpha_color=(255, 255, 255)): + """ + Check the image data. If it is another type of image file, try to decode it into a numpy array. + The inference network requires three-channel images, So the following channel conversions are done + single channel image: Gray to RGB R←Y,G←Y,B←Y + four channel image: alpha_to_color + args: + img: image data + file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats + storage type: binary image, net image file, local image file + alpha_color: Background color in images in RGBA format + return: numpy.array (h, w, 3) + """ if isinstance(img, bytes): img = img_decode(img) if isinstance(img, str): @@ -550,9 +562,12 @@ def check_img(img): if img is None: logger.error("error in loading image:{}".format(image_file)) return None + # single channel image array.shape:h,w if isinstance(img, np.ndarray) and len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - + # four channel image array.shape:h,w,c + if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4: + img = alpha_to_color(img, alpha_color) return img @@ -638,7 +653,7 @@ def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, alpha_col 'Since the angle classifier is not initialized, it will not be used during the forward process' ) - img = check_img(img) + img = check_img(img, alpha_color) # for infer pdf file if isinstance(img, list): if self.page_num > len(img) or self.page_num == 0: @@ -648,7 +663,6 @@ def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, alpha_col imgs = [img] def preprocess_image(_image): - _image = alpha_to_color(_image, alpha_color) if inv: _image = cv2.bitwise_not(_image) if bin: @@ -755,8 +769,8 @@ def __init__(self, **kwargs): logger.debug(params) super().__init__(params) - def __call__(self, img, return_ocr_result_in_table=False, img_idx=0): - img = check_img(img) + def __call__(self, img, return_ocr_result_in_table=False, img_idx=0, alpha_color=(255, 255, 255)): + img = check_img(img, alpha_color) res, _ = super().__call__( img, return_ocr_result_in_table, img_idx=img_idx) return res