Skip to content

Commit

Permalink
cla_email
Browse files Browse the repository at this point in the history
  • Loading branch information
liuhongen1234567 committed Jul 22, 2024
1 parent beb1dde commit ac5d6c1
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 34 deletions.
5 changes: 2 additions & 3 deletions configs/rec/rec_latex_ocr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Global:
infer_img: doc/datasets/pme_demo/0000013.png
infer_mode: False
use_space_char: False
fast_tokenizer_file: ppocr/utils/dict/latex_ocr_tokenizer.json
rec_char_dict_path: ppocr/utils/dict/latex_ocr_tokenizer.json
save_res_path: ./output/rec/predicts_latexocr.txt

Optimizer:
Expand Down Expand Up @@ -46,7 +46,6 @@ Architecture:
Head:
name: LaTeXOCRHead
pad_value: 0
ignore_index: -100
is_export: False
decoder_args:
attn_on_attn: True
Expand All @@ -60,7 +59,7 @@ Loss:

PostProcess:
name: LaTeXOCRDecode
fast_tokenizer_file: ppocr/utils/dict/latex_ocr_tokenizer.json
rec_char_dict_path: ppocr/utils/dict/latex_ocr_tokenizer.json

Metric:
name: LaTeXOCRMetric
Expand Down
4 changes: 2 additions & 2 deletions doc/doc_ch/algorithm_rec_latex_ocr.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs

- 默认每训练22个epoch(60000次iteration)进行1次评估,若您更改训练的batch_size,或更换数据集,请在训练时作出如下修改
```
python3 tools/train.py -c configs/rec/rec_latex_ocr.yml -o Global.eval_batch_step=[0, {length_of_dataset//batch_size*22}]
python3 tools/train.py -c configs/rec/rec_latex_ocr.yml -o Global.eval_batch_step=[0,{length_of_dataset//batch_size*22}]
```

<a name="3-2"></a>
Expand Down Expand Up @@ -115,7 +115,7 @@ python3 tools/export_model.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrai
# 目前的静态图模型支持的最大输出长度为512
```
**注意:**
- 如果您是在自己的数据集上训练的模型,并且调整了字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件
- 如果您是在自己的数据集上训练的模型,并且调整了字典文件,请检查配置文件中的`rec_char_dict_path`是否为所需要的字典文件
- [转换后模型下载地址](https://paddleocr.bj.bcebos.com/contribution/rec_latex_ocr_infer.tar)

转换成功后,在目录下有三个文件:
Expand Down
4 changes: 2 additions & 2 deletions ppocr/data/imaug/label_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,10 +1777,10 @@ def encodech(self, text):
class LatexOCRLabelEncode(object):
def __init__(
self,
fast_tokenizer_file,
rec_char_dict_path,
**kwargs,
):
self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
self.model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
self.pad_token_id = 0
self.bos_token_id = 1
Expand Down
29 changes: 12 additions & 17 deletions ppocr/data/imaug/latex_ocr_aug.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@
import math
import cv2
import numpy as np
import albumentations as alb
import albumentations as A
from PIL import Image


class LatexTrainTransform:
def __init__(self, bitmap_prob=0.04, **kwargs):
# your init code
self.bitmap_prob = bitmap_prob
self.train_transform = alb.Compose(
self.train_transform = A.Compose(
[
alb.Compose(
A.Compose(
[
alb.ShiftScaleRotate(
A.ShiftScaleRotate(
shift_limit=0,
scale_limit=(-0.15, 0),
rotate_limit=1,
Expand All @@ -46,7 +46,7 @@ def __init__(self, bitmap_prob=0.04, **kwargs):
value=[255, 255, 255],
p=1,
),
alb.GridDistortion(
A.GridDistortion(
distort_limit=0.1,
border_mode=0,
interpolation=3,
Expand All @@ -56,13 +56,11 @@ def __init__(self, bitmap_prob=0.04, **kwargs):
],
p=0.15,
),
alb.RGBShift(
r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.3
),
alb.GaussNoise(10, p=0.2),
alb.RandomBrightnessContrast(0.05, (-0.2, 0), True, p=0.2),
alb.ImageCompression(95, p=0.3),
alb.ToGray(always_apply=True),
A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.3),
A.GaussNoise(10, p=0.2),
A.RandomBrightnessContrast(0.05, (-0.2, 0), True, p=0.2),
A.ImageCompression(95, p=0.3),
A.ToGray(always_apply=True),
]
)

Expand All @@ -71,17 +69,16 @@ def __call__(self, data):
if np.random.random() < self.bitmap_prob:
img[img != 255] = 0
img = self.train_transform(image=img)["image"]
# print(img.shape)
data["image"] = img
return data


class LatexTestTransform:
def __init__(self, **kwargs):
# your init code
self.test_transform = alb.Compose(
self.test_transform = A.Compose(
[
alb.ToGray(always_apply=True),
A.ToGray(always_apply=True),
]
)

Expand Down Expand Up @@ -170,11 +167,9 @@ def __init__(self, **kwargs):

def __call__(self, data):
img = data["image"]
# H, W, C
im_h, im_w = img.shape[:2]
divide_h = math.ceil(im_h / 16) * 16
divide_w = math.ceil(im_w / 16) * 16
# print(img.shape, "pad_shape")
img = img[:, :, 0]
img = np.pad(
img, ((0, divide_h - im_h), (0, divide_w - im_w)), constant_values=(1, 1)
Expand Down
4 changes: 2 additions & 2 deletions ppocr/data/latexocr_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def __init__(self, config, mode, logger, seed=None):
self.batchsize = dataset_config.pop("batch_size_per_pair")
self.keep_smaller_batches = dataset_config.pop("keep_smaller_batches")
self.max_seq_len = global_config.pop("max_seq_len")
self.fast_tokenizer_file = global_config.pop("fast_tokenizer_file")
self.tokenizer = LatexOCRLabelEncode(self.fast_tokenizer_file)
self.rec_char_dict_path = global_config.pop("rec_char_dict_path")
self.tokenizer = LatexOCRLabelEncode(self.rec_char_dict_path)

file = open(pkl_path, "rb")
data = pickle.load(file)
Expand Down
2 changes: 0 additions & 2 deletions ppocr/modeling/heads/rec_latexocr_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,6 @@ def __init__(
net=None,
in_channels=256,
out_channels=256,
ignore_index=-100,
pad_value=0,
decoder_args=None,
is_export=False,
Expand All @@ -868,7 +867,6 @@ def __init__(
self.eos_token = 2
self.max_length = 512
self.pad_value = pad_value
self.ignore_index = ignore_index

self.net = transformer_decoder
self.max_seq_len = self.net.max_seq_len
Expand Down
4 changes: 2 additions & 2 deletions ppocr/postprocess/rec_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,9 +1216,9 @@ def add_special_char(self, dict_character):
class LaTeXOCRDecode(object):
"""Convert between latex-symbol and symbol-index"""

def __init__(self, fast_tokenizer_file=None, **kwargs):
def __init__(self, rec_char_dict_path, **kwargs):
super(LaTeXOCRDecode, self).__init__()
self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)

def post_process(self, s):
text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
Expand Down
10 changes: 6 additions & 4 deletions tools/infer/predict_rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def __init__(self, args, logger=None):
elif self.rec_algorithm == "LaTeXOCR":
postprocess_params = {
"name": "LaTeXOCRDecode",
"fast_tokenizer_file": args.rec_char_dict_path,
"rec_char_dict_path": args.rec_char_dict_path,
}
elif self.rec_algorithm == "ParseQ":
postprocess_params = {
Expand Down Expand Up @@ -515,8 +515,7 @@ def norm_img_latexocr(self, img):
max_dimensions = [672, 192]
mean = np.array(mean).reshape(shape).astype("float32")
std = np.array(std).reshape(shape).astype("float32")
img = (img.astype("float32") * scale - mean) / std
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

im_h, im_w = img.shape[:2]
if (
min_dimensions[0] <= im_w <= max_dimensions[0]
Expand All @@ -527,7 +526,10 @@ def norm_img_latexocr(self, img):
img = Image.fromarray(np.uint8(img))
img = self.minmax_size_(self.pad_(img), max_dimensions, min_dimensions)
img = np.array(img)
img = np.dstack((img, img, img))
im_h, im_w = img.shape[:2]
img = np.dstack([img, img, img])
img = (img.astype("float32") * scale - mean) / std
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
divide_h = math.ceil(im_h / 16) * 16
divide_w = math.ceil(im_w / 16) * 16
img = np.pad(
Expand Down

0 comments on commit ac5d6c1

Please sign in to comment.