mindspore-lab · horcham · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024 · Feb 5, 2024
diff --git a/configs/cls/mobilenetv3/cls_mv3.yaml b/configs/cls/mobilenetv3/cls_mv3.yaml
@@ -142,3 +142,47 @@ eval:
       drop_remainder: False
       max_rowsize: 12
       num_workers: 8
+
+predict:
+  backend: MindSpore
+  deive_target: Ascend
+  device_id: 1
+  max_device_memory: 8GB
+  amp_level: O0
+  mode: 0
+  ckpt_load_path: /root/.mindspore/models/cls_mobilenetv3-92db9c58.ckpt
+  dataset_sink_mode: False
+  dataset:
+    type: RecDataset
+    dataset_root: dir/to/dataset
+    data_dir: all_images
+    label_file: val_cls_gt.txt
+    sample_ratio: 1.0
+    shuffle: False
+    transform_pipeline:
+      - DecodeImage:
+          img_mode: BGR
+          to_float32: False
+      - Rotate90IfVertical:
+          threshold: 2.0
+          direction: counterclockwise
+      - RecResizeImg:
+          image_shape: [48, 192] # H, W
+          padding: False # aspect ratio will be preserved if true.
+      - NormalizeImage:
+          bgr_to_rgb: True
+          is_hwc: True
+          mean : [127.0, 127.0, 127.0]
+          std : [127.0, 127.0, 127.0]
+      - ToCHWImage:
+    #  the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
+    output_columns: ['image', 'label']  # TODO return text string padding w/ fixed length, and a scaler to indicate the length
+    net_input_column_index: [0] # input indices for network forward func in output_columns
+    label_column_index: [1] # input indices marked as label
+
+  loader:
+      shuffle: False
+      batch_size: 8
+      drop_remainder: False
+      max_rowsize: 12
+      num_workers: 8
diff --git a/configs/det/dbnet/db_r50_icdar15.yaml b/configs/det/dbnet/db_r50_icdar15.yaml
@@ -157,7 +157,13 @@ eval:
     num_workers: 2
 
 predict:
-  ckpt_load_path: tmp_det/best.ckpt
+  backend: MindSpore
+  deive_target: Ascend
+  device_id: 0
+  max_device_memory: 8GB
+  amp_level: O0
+  mode: 0
+  ckpt_load_path: /root/.mindspore/models/dbnet_resnet50-c3a4aa24.ckpt
   output_save_dir: ./output
   dataset_sink_mode: False
   dataset:

diff --git a/configs/layout/yolov8/yolov8n.yaml b/configs/layout/yolov8/yolov8n.yaml
@@ -151,3 +151,37 @@ eval:
     drop_remainder: False
     max_rowsize: 12
     num_workers: 8
+
+predict:
+  backend: MindSpore
+  deive_target: Ascend
+  device_id: 3
+  max_device_memory: 8GB
+  amp_level: O0
+  mode: 0
+  ckpt_load_path: /root/.mindspore/models/yolov8n-4b9e8004.ckpt
+  dataset_sink_mode: False
+  dataset:
+    type: PublayNetDataset
+    dataset_path: publaynet/val.txt
+    annotations_path: *annotations_path
+    img_size: 800
+    transform_pipeline:
+      - func_name: letterbox
+        scaleup: False
+      - func_name: image_norm
+        scale: 255.
+      - func_name: image_transpose
+        bgr2rgb: True
+        hwc2chw: True
+    batch_size: *refine_batch_size
+    stride: 64
+    output_columns: ['image', 'labels', 'image_ids', 'hw_ori', 'hw_scale', 'pad']
+    net_input_column_index: [ 0 ]  # input indices for network forward func in output_columns
+    meta_data_column_index: [ 2, 3, 4, 5 ]  # input indices marked as label
+  loader:
+    shuffle: False
+    batch_size: *refine_batch_size
+    drop_remainder: False
+    max_rowsize: 12
+    num_workers: 8
diff --git a/configs/rec/crnn/crnn_resnet34.yaml b/configs/rec/crnn/crnn_resnet34.yaml
@@ -150,7 +150,13 @@ eval:
       num_workers: 8
 
 predict:
-  ckpt_load_path: ./tmp_rec/best.ckpt
+  backend: MindSpore
+  deive_target: Ascend
+  device_id: 2
+  max_device_memory: 8GB
+  amp_level: O3
+  mode: 0
+  ckpt_load_path: /root/.mindspore/models/crnn_resnet34-83f37f07.ckpt
   vis_font_path: tools/utils/simfang.ttf
   dataset_sink_mode: False
   dataset:

diff --git a/deploy/py_infer/example/dataset/layout/example1.png b/deploy/py_infer/example/dataset/layout/example1.png
diff --git a/deploy/py_infer/example/dataset/layout/example2.png b/deploy/py_infer/example/dataset/layout/example2.png
diff --git a/deploy/py_infer/example/dataset/layout/example3.png b/deploy/py_infer/example/dataset/layout/example3.png
diff --git a/deploy/py_infer/src/core/model/model.py b/deploy/py_infer/src/core/model/model.py
@@ -106,8 +106,8 @@ def warmup(self):
             height, width = hw_list[0]
             warmup_shape = [(*other_shape, height, width)]  # Only single input
 
-        dummy_tensor = [np.random.randn(*shape).astype(dtype) for shape, dtype in zip(warmup_shape, self.input_dtype)]
-        self.model.infer(dummy_tensor)
+        # dummy_tensor = [np.random.randn(*shape).astype(dtype) for shape, dtype in zip(warmup_shape, self.input_dtype)]
+        # self.model.infer(dummy_tensor)
 
     def __del__(self):
         if hasattr(self, "model") and self.model:

diff --git a/deploy/py_infer/src/data_process/postprocess/builder.py b/deploy/py_infer/src/data_process/postprocess/builder.py
@@ -44,6 +44,7 @@ def get_device_status():
     def _get_status():
         nonlocal status
         try:
+            ms.set_context(max_device_memory="0.01GB")
             status = ms.Tensor([0])[0:].asnumpy()[0]
         except RuntimeError:
             status = 1

diff --git a/deploy/py_infer/src/infer_args.py b/deploy/py_infer/src/infer_args.py
@@ -119,6 +119,9 @@ def get_args():
         "--show_log", type=str2bool, default=False, required=False, help="Whether show log when inferring."
     )
     parser.add_argument("--save_log_dir", type=str, required=False, help="Log saving dir.")
+    parser.add_argument(
+        "--is_concat", type=str2bool, default=False, help="Whether to concatenate crops after the detection."
+    )
 
     args = parser.parse_args()
     setup_logger(args)

diff --git a/deploy/py_infer/src/parallel/module/detection/det_post_node.py b/deploy/py_infer/src/parallel/module/detection/det_post_node.py
@@ -1,3 +1,4 @@
+import cv2
 import numpy as np
 
 from ....data_process.utils import cv_utils
@@ -10,19 +11,44 @@ def __init__(self, args, msg_queue):
         super(DetPostNode, self).__init__(args, msg_queue)
         self.text_detector = None
         self.task_type = self.args.task_type
+        self.is_concat = self.args.is_concat
 
     def init_self_args(self):
         self.text_detector = TextDetector(self.args)
         self.text_detector.init(preprocess=False, model=False, postprocess=True)
         super().init_self_args()
 
+    def concat_crops(self, crops: list):
+        """
+        Concatenates the list of cropped images horizontally after resizing them to have the same height.
+
+        Args:
+            crops (list): A list of cropped images represented as numpy arrays.
+
+        Returns:
+            numpy.ndarray: A horizontally concatenated image array.
+        """
+        max_height = max(crop.shape[0] for crop in crops)
+        resized_crops = []
+        for crop in crops:
+            h, w, c = crop.shape
+            new_h = max_height
+            new_w = int((w / h) * new_h)
+
+            resized_img = cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+            resized_crops.append(resized_img)
+        crops_concated = np.concatenate(resized_crops, axis=1)
+        return crops_concated
+
     def process(self, input_data):
         if input_data.skip:
             self.send_to_next_module(input_data)
             return
 
         data = input_data.data
         boxes = self.text_detector.postprocess(data["pred"], data["shape_list"])
+        if self.is_concat:
+            boxes = sorted(boxes, key=lambda points: (points[0][1], points[0][0]))
 
         infer_res_list = []
         for box in boxes:
@@ -39,6 +65,8 @@ def process(self, input_data):
             for box in infer_res_list:
                 sub_image = cv_utils.crop_box_from_image(image, np.array(box))
                 sub_image_list.append(sub_image)
+            if self.is_concat:
+                sub_image_list = len(sub_image_list) * [self.concat_crops(sub_image_list)]
             input_data.sub_image_list = sub_image_list
 
         input_data.data = None

diff --git a/mindocr/data/transforms/layout_transform.py b/mindocr/data/transforms/layout_transform.py
@@ -0,0 +1,90 @@
+import cv2
+import numpy as np
+
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../../../")))
+
+from mindocr.data.layout_dataset import xyxy2xywh
+
+def letterbox(scaleup):
+    def func(data):
+        image = data["image"]
+        hw_ori = data["raw_img_shape"]
+        new_shape = data["target_size"]
+        color = (114, 114, 114)
+        # Resize and pad image while meeting stride-multiple constraints
+        shape = image.shape[:2]  # current shape [height, width]
+        h, w = shape[:]
+        # h0, w0 = hw_ori
+        h0, w0 = new_shape
+        # hw_scale = np.array([h / h0, w / w0])
+        hw_scale = np.array([h0 / h, w0 / w])
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not scaleup:  # only scale down, do not scale up (for better test mAP)
+            r = min(r, 1.0)
+
+        # Compute padding
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+        dw, dh = dw / 2, dh / 2  # divide padding into 2 sides
+        hw_pad = np.array([dh, dw])
+
+        if shape[::-1] != new_unpad:  # resize
+            image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
+        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+        image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+
+        data["image"] = image
+        data["image_ids"] = 0
+        data["hw_ori"] = hw_ori
+        data["hw_scale"] = hw_scale
+        data["pad"] = hw_pad
+        return data
+
+    return func
+
+
+def image_norm(scale=255.0):
+    def func(data):
+        image = data["image"]
+        image = image.astype(np.float32, copy=False)
+        image /= scale
+        data["image"] = image
+        return data
+
+    return func
+
+
+def image_transpose(bgr2rgb=True, hwc2chw=True):
+    def func(data):
+        image = data["image"]
+        if bgr2rgb:
+            image = image[:, :, ::-1]
+        if hwc2chw:
+            image = image.transpose(2, 0, 1)
+        data["image"] = image
+        return data
+
+    return func
+
+def label_norm(labels, xyxy2xywh_=True):
+    def func(data):
+        if len(labels) == 0:
+            return data, labels
+
+        if xyxy2xywh_:
+            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh
+
+        labels[:, [2, 4]] /= data.shape[0]  # normalized height 0-1
+        labels[:, [1, 3]] /= data.shape[1]  # normalized width 0-1
+
+        return data, labels
+    return func
diff --git a/mindocr/data/transforms/transforms_factory.py b/mindocr/data/transforms/transforms_factory.py
@@ -15,6 +15,7 @@
 from .rec_transforms import *
 from .svtr_transform import *
 from .table_transform import *
+from .layout_transform import *
 
 __all__ = ["create_transforms", "run_transforms", "transforms_dbnet_icdar15"]
 _logger = logging.getLogger(__name__)

diff --git a/mindocr/infer/classification/__init__.py b/mindocr/infer/classification/__init__.py
@@ -0,0 +1,3 @@
+from .cls_infer_node import ClsInferNode
+from .cls_post_node import ClsPostNode
+from .cls_pre_node import ClsPreNode
diff --git a/mindocr/infer/classification/classification.py b/mindocr/infer/classification/classification.py
@@ -0,0 +1,62 @@
+import logging
+import os
+import time
+import sys
+import numpy as np
+import yaml
+from addict import Dict
+from typing import List
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../../")))
+
+from tools.infer.text.utils import get_ckpt_file
+from mindocr.data.transforms import create_transforms, run_transforms
+from mindocr.postprocess import build_postprocess
+from mindocr.infer.utils.model import MSModel, LiteModel
+
+
+algo_to_model_name = {
+    "MV3": "cls_mobilenet_v3_small_100_model",
+}
+logger = logging.getLogger("mindocr")
+
+class ClsPreprocess(object):
+    def __init__(self, args):
+        self.args = args
+        with open(args.cls_model_name_or_config, "r") as f:
+            self.yaml_cfg = Dict(yaml.safe_load(f))
+        self.transforms = create_transforms(self.yaml_cfg.predict.dataset.transform_pipeline)
+
+    def __call__(self, img):
+        data = {"image": img}
+        data = run_transforms(data, self.transforms[1:])
+        return data
+
+
+class ClsModelMS(MSModel):
+    def __init__(self, args):
+        self.args = args
+        self.model_name = algo_to_model_name[args.cls_algorithm]
+        self.config_path = args.cls_config_path
+        self._init_model(self.model_name, self.config_path)
+
+
+class ClsModelLite(LiteModel):
+    def __init__(self, args):
+        self.args = args
+        self.model_name = algo_to_model_name[args.cls_algorithm]
+        self.config_path = args.cls_config_path
+        self._init_model(self.model_name, self.config_path)
+
+INFER_CLS_MAP = {"MindSporeLite": ClsModelLite, "MindSpore": ClsModelMS}
+
+class ClsPostprocess(object):
+    def __init__(self, args):
+        self.args = args
+        with open(args.cls_model_name_or_config, "r") as f:
+            self.yaml_cfg = Dict(yaml.safe_load(f))        
+        self.postprocessor = build_postprocess(self.yaml_cfg.postprocess)
+
+    def __call__(self, pred):
+        return self.postprocessor(pred)