Skip to content

Commit

Permalink
add ci for paddleocr test (#12062)
Browse files Browse the repository at this point in the history
* add ci for paddleocr test

* fix flake8 error

* fix paddlepaddle deps

* add dep

* fix

* move flake8 to pre-commit

* update ut

* fix bug

* fix bug set paddlepaddle==2.5

* fix bug

* fix bug

* fix bug

* update test

* remove lscpu
  • Loading branch information
GreatV authored May 22, 2024
1 parent 579d0c3 commit af87691
Show file tree
Hide file tree
Showing 14 changed files with 184 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
name: pre-commit
name: PaddleOCR Code Style Check

on:
pull_request:
push:
branches: ['main', 'release/*']

jobs:
pre-commit:
check-code-style:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: actions/checkout@v4
with:
ref: ${{ github.ref }}
- uses: actions/setup-python@v5
with:
python-version: '3.10'
# Install Dependencies for Python
Expand Down
30 changes: 30 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: PaddleOCR PR Tests

on:
push:
pull_request:
branches: ["main", "release/*"]

permissions:
contents: read

jobs:
test-pr:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install "paddlepaddle==2.5" requests
pip install -e .
- name: Test with pytest
run: |
pytest tests/
13 changes: 13 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,16 @@ repos:
hooks:
- id: black
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$

# Flake8
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
hooks:
- id: flake8
args:
- --count
- --select=E9,F63,F7,F82
- --show-source
- --statistics
exclude: ^benchmark/|^test_tipc/

2 changes: 1 addition & 1 deletion benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __call__(self, data: dict):
return data
data["img"] = (
random_noise(data["img"], mode="gaussian", clip=True) * 255
).astype(im.dtype)
).astype(data["img"].dtype)
return data


Expand Down
2 changes: 1 addition & 1 deletion deploy/hubserving/kie_ser/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def serving_method(self, images, **kwargs):


if __name__ == "__main__":
ocr = OCRSystem()
ocr = KIESer()
ocr._initialize()
image_path = [
"./doc/imgs/11.jpg",
Expand Down
2 changes: 1 addition & 1 deletion deploy/hubserving/kie_ser_re/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def serving_method(self, images, **kwargs):


if __name__ == "__main__":
ocr = OCRSystem()
ocr = KIESerRE()
ocr._initialize()
image_path = [
"./doc/imgs/11.jpg",
Expand Down
10 changes: 5 additions & 5 deletions ppocr/data/imaug/label_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,11 +841,11 @@ def __call__(self, data):
return data

def xyxyxyxy2xywh(self, boxes):
new_bboxes = np.zeros([len(bboxes), 4])
new_bboxes[:, 0] = bboxes[:, 0::2].min() # x1
new_bboxes[:, 1] = bboxes[:, 1::2].min() # y1
new_bboxes[:, 2] = bboxes[:, 0::2].max() - new_bboxes[:, 0] # w
new_bboxes[:, 3] = bboxes[:, 1::2].max() - new_bboxes[:, 1] # h
new_bboxes = np.zeros([len(boxes), 4])
new_bboxes[:, 0] = boxes[:, 0::2].min() # x1
new_bboxes[:, 1] = boxes[:, 1::2].min() # y1
new_bboxes[:, 2] = boxes[:, 0::2].max() - new_bboxes[:, 0] # w
new_bboxes[:, 3] = boxes[:, 1::2].max() - new_bboxes[:, 1] # h
return new_bboxes

def xyxy2xywh(self, bboxes):
Expand Down
4 changes: 3 additions & 1 deletion ppocr/losses/distillation_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,9 @@ def forward(self, predicts, batch):
loss = super().forward(out1, out2, ctc_label)
if isinstance(loss, dict):
for key in loss:
loss_dict["{}_{}_{}".format(self.name, model_name, idx)] = loss[key]
loss_dict[
"{}_{}_{}".format(self.name, self.model_name_pairs, idx)
] = loss[key]
else:
loss_dict["{}_{}".format(self.name, idx)] = loss
return loss_dict
2 changes: 1 addition & 1 deletion ppocr/metrics/vqa_token_re_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import paddle

__all__ = ["KIEMetric"]
__all__ = ["VQAReTokenMetric"]


class VQAReTokenMetric(object):
Expand Down
2 changes: 1 addition & 1 deletion ppocr/metrics/vqa_token_ser_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import paddle

__all__ = ["KIEMetric"]
__all__ = ["VQASerTokenMetric"]


class VQASerTokenMetric(object):
Expand Down
2 changes: 1 addition & 1 deletion ppocr/modeling/backbones/rec_efficientb3_pren.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import paddle.nn as nn
import paddle.nn.functional as F

__all__ = ["EfficientNetb3"]
__all__ = ["EfficientNetb3_PREN"]

GlobalParams = collections.namedtuple(
"GlobalParams",
Expand Down
2 changes: 1 addition & 1 deletion ppocr/modeling/heads/rec_aster_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def sample(self, x):
# Decoder
state = paddle.zeros([1, batch_size, self.sDim])

predicted_ids, predicted_scores = [], []
predicted_ids, predicted_scores, predicted = [], [], None
for i in range(self.max_len_labels):
if i == 0:
y_prev = paddle.full(shape=[batch_size], fill_value=self.num_classes)
Expand Down
5 changes: 4 additions & 1 deletion ppocr/utils/loggers/wandb_logger.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import os
from .base_logger import BaseLogger
from ppocr.utils.logging import get_logger

logger = get_logger()


class WandbLogger(BaseLogger):
Expand All @@ -11,7 +14,7 @@ def __init__(
entity=None,
save_dir=None,
config=None,
**kwargs
**kwargs,
):
try:
import wandb
Expand Down
116 changes: 116 additions & 0 deletions tests/test_paddleocr_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from typing import Any

import pytest
from paddleocr import PaddleOCR, PPStructure


# Test image paths
IMAGE_PATHS_OCR = ["./doc/imgs_en/254.jpg", "./doc/imgs_en/img_10.jpg"]
IMAGE_PATHS_STRUCTURE = [
"./ppstructure/docs/table/layout.jpg",
"./ppstructure/docs/table/1.png",
]


@pytest.fixture(params=["en", "ch"])
def ocr_engine(request: Any) -> PaddleOCR:
"""
Initialize PaddleOCR engine with different languages.
Args:
request: pytest fixture request object.
Returns:
An instance of PaddleOCR.
"""
return PaddleOCR(lang=request.param)


def test_ocr_initialization(ocr_engine: PaddleOCR) -> None:
"""
Test PaddleOCR initialization.
Args:
ocr_engine: An instance of PaddleOCR.
"""
assert ocr_engine is not None


@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR)
def test_ocr_function(ocr_engine: PaddleOCR, image_path: str) -> None:
"""
Test PaddleOCR OCR functionality with different images.
Args:
ocr_engine: An instance of PaddleOCR.
image_path: Path to the image to be processed.
"""
result = ocr_engine.ocr(image_path)
assert result is not None
assert isinstance(result, list)


@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR)
def test_ocr_det_only(ocr_engine: PaddleOCR, image_path: str) -> None:
"""
Test PaddleOCR OCR functionality with detection only.
Args:
ocr_engine: An instance of PaddleOCR.
image_path: Path to the image to be processed.
"""
result = ocr_engine.ocr(image_path, det=True, rec=False)
assert result is not None
assert isinstance(result, list)


@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR)
def test_ocr_rec_only(ocr_engine: PaddleOCR, image_path: str) -> None:
"""
Test PaddleOCR OCR functionality with recognition only.
Args:
ocr_engine: An instance of PaddleOCR.
image_path: Path to the image to be processed.
"""
result = ocr_engine.ocr(image_path, det=False, rec=True)
assert result is not None
assert isinstance(result, list)


@pytest.fixture(params=["en", "ch"])
def structure_engine(request: Any) -> PPStructure:
"""
Initialize PPStructure engine with different languages.
Args:
request: pytest fixture request object.
Returns:
An instance of PPStructure.
"""
return PPStructure(lang=request.param)


def test_structure_initialization(structure_engine: PPStructure) -> None:
"""
Test PPStructure initialization.
Args:
structure_engine: An instance of PPStructure.
"""
assert structure_engine is not None


@pytest.mark.parametrize("image_path", IMAGE_PATHS_STRUCTURE)
def test_structure_function(structure_engine: PPStructure, image_path: str) -> None:
"""
Test PPStructure structure analysis functionality with different images.
Args:
structure_engine: An instance of PPStructure.
image_path: Path to the image to be processed.
"""
result = structure_engine(image_path)
assert result is not None
assert isinstance(result, list)

0 comments on commit af87691

Please sign in to comment.