Skip to content

Commit

Permalink
fix:修复通道数不匹配造成的PPOCRLabel启动失败问题#10748,根据更新日志发现#10655,由于paddleocr中增加了对…
Browse files Browse the repository at this point in the history
…透明色的需求,由于在check_img中以及存在对单通道图像转三通道图像的处理,因此将该四通道图像处理成三通道的图像也放入该check_img,并统一三通道图像输出逻辑。 (#10847)

add:由于函数复杂度增高,增加注释。
  • Loading branch information
Gmgge authored Sep 21, 2023
1 parent ebc67db commit 2fdac31
Showing 1 changed file with 20 additions and 6 deletions.
26 changes: 20 additions & 6 deletions paddleocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,19 @@ def img_decode(content: bytes):
return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)


def check_img(img):
def check_img(img, alpha_color=(255, 255, 255)):
"""
Check the image data. If it is another type of image file, try to decode it into a numpy array.
The inference network requires three-channel images, So the following channel conversions are done
single channel image: Gray to RGB R←Y,G←Y,B←Y
four channel image: alpha_to_color
args:
img: image data
file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats
storage type: binary image, net image file, local image file
alpha_color: Background color in images in RGBA format
return: numpy.array (h, w, 3)
"""
if isinstance(img, bytes):
img = img_decode(img)
if isinstance(img, str):
Expand Down Expand Up @@ -550,9 +562,12 @@ def check_img(img):
if img is None:
logger.error("error in loading image:{}".format(image_file))
return None
# single channel image array.shape:h,w
if isinstance(img, np.ndarray) and len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

# four channel image array.shape:h,w,c
if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4:
img = alpha_to_color(img, alpha_color)
return img


Expand Down Expand Up @@ -638,7 +653,7 @@ def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, alpha_col
'Since the angle classifier is not initialized, it will not be used during the forward process'
)

img = check_img(img)
img = check_img(img, alpha_color)
# for infer pdf file
if isinstance(img, list):
if self.page_num > len(img) or self.page_num == 0:
Expand All @@ -648,7 +663,6 @@ def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, alpha_col
imgs = [img]

def preprocess_image(_image):
_image = alpha_to_color(_image, alpha_color)
if inv:
_image = cv2.bitwise_not(_image)
if bin:
Expand Down Expand Up @@ -755,8 +769,8 @@ def __init__(self, **kwargs):
logger.debug(params)
super().__init__(params)

def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
img = check_img(img)
def __call__(self, img, return_ocr_result_in_table=False, img_idx=0, alpha_color=(255, 255, 255)):
img = check_img(img, alpha_color)
res, _ = super().__call__(
img, return_ocr_result_in_table, img_idx=img_idx)
return res
Expand Down

0 comments on commit 2fdac31

Please sign in to comment.