From 6fdce04634f341eb491b1c5a33161e7d3734ef54 Mon Sep 17 00:00:00 2001
From: Eric Guo <2364319479@qq.com>
Date: Mon, 15 Apr 2024 10:52:43 +0800
Subject: [PATCH] Update quickstart.md (#11927)

fix issues:
1.getPixmap() function is not recognized,changing to get_pixmap
2.fix TypeError when paddle recognized an empty page
3.pre-stored pageCount to avoid issues
4.added GPU usage
---
 doc/doc_ch/quickstart.md | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md
index 0600d1642c..b802c5eed5 100644
--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -211,39 +211,44 @@ from paddleocr import PaddleOCR, draw_ocr
 
 # Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
 # 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
-ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=2)  # need to run only once to download and load model into memory
-img_path = './xxx.pdf'
-result = ocr.ocr(img_path, cls=True)
+PAGE_NUM = 10 # 将识别页码前置作为全局，防止后续打开pdf的参数和前文识别参数不一致 / Set the recognition page number
+pdf_path = 'default.pdf'
+ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM)  # need to run only once to download and load model into memory
+# ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM,use_gpu=0) # 如果需要使用GPU，请取消此行的注释 并注释上一行 / To Use GPU,uncomment this line and comment the above one.
+result = ocr.ocr(pdf_path, cls=True)
 for idx in range(len(result)):
     res = result[idx]
+    if res == None: # 识别到空页就跳过，防止程序报错 / Skip when empty result detected to avoid TypeError:NoneType
+        print(f"[DEBUG] Empty page {idx+1} detected, skip it.")
+        continue
     for line in res:
         print(line)
-
 # 显示结果
 import fitz
 from PIL import Image
 import cv2
 import numpy as np
 imgs = []
-with fitz.open(img_path) as pdf:
-    for pg in range(0, pdf.pageCount):
+with fitz.open(pdf_path) as pdf:
+    for pg in range(0, PAGE_NUM):
         page = pdf[pg]
         mat = fitz.Matrix(2, 2)
-        pm = page.getPixmap(matrix=mat, alpha=False)
+        pm = page.get_pixmap(matrix=mat, alpha=False)
         # if width or height > 2000 pixels, don't enlarge the image
         if pm.width > 2000 or pm.height > 2000:
-            pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
-
+            pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
         img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
         img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
         imgs.append(img)
 for idx in range(len(result)):
     res = result[idx]
+    if res == None:
+        continue
     image = imgs[idx]
     boxes = [line[0] for line in res]
     txts = [line[1][0] for line in res]
     scores = [line[1][1] for line in res]
-    im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
+    im_show = draw_ocr(image, boxes, txts, scores, font_path='simfang.ttf')
     im_show = Image.fromarray(im_show)
     im_show.save('result_page_{}.jpg'.format(idx))
 ```