[Feature] Add codespell pre-commit hook and fix typos (open-mmlab#520)

* add spellcheck hook * fix typos
gaotongxiao · Oct 13, 2021 · 76590c5 · 76590c5
1 parent 21eb82a
commit 76590c5
Show file tree

Hide file tree

Showing 22 changed files with 59 additions and 50 deletions.
diff --git a/.codespellrc b/.codespellrc
@@ -0,0 +1,5 @@
+[codespell]
+skip = *.ipynb
+count =
+quiet-level = 3
+ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -113,7 +113,7 @@ git fetch upstream
 
 # update the main branch of your fork
 git checkout main
-git rebase upsteam/main
+git rebase upstream/main
 git push origin main
 
 # update the <your_feature_branch> branch

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,6 +16,10 @@ repos:
     rev: v0.30.0
     hooks:
       - id: yapf
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+      - id: codespell
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.1.0
     hooks:

diff --git a/docs/install.md b/docs/install.md
@@ -12,7 +12,7 @@
 - [MMCV](https://mmcv.readthedocs.io/en/latest/#installation) >= 1.3.8
 - [MMDetection](https://mmdetection.readthedocs.io/en/latest/#installation) >= 2.14.0
 
-We have tested the following versions of OS and softwares:
+We have tested the following versions of OS and software:
 
 - OS: Ubuntu 16.04
 - CUDA: 10.1

diff --git a/docs/merge_docs.sh b/docs/merge_docs.sh
@@ -11,6 +11,6 @@ cat ../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1
 cat ../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
 cat ../configs/ner/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Named Entity Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >ner_models.md
 
-# replace speical symbols in demo.md
+# replace special symbols in demo.md
 cp ../demo/README.md demo.md
 sed -i 's/:heavy_check_mark:/Yes/g' demo.md && sed -i 's/:x:/No/g' demo.md
diff --git a/docs/training.md b/docs/training.md
@@ -92,7 +92,7 @@ Here we list some configs that are frequently used during training for quick ref
 ```python
 total_epochs = 1200
 data = dict(
-    # Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overrided in dataloader's config.
+    # Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overridden in dataloader's config.
     samples_per_gpu=8, # Batch size per GPU
     workers_per_gpu=4, # Number of workers to process data for each GPU
     train_dataloader=dict(samples_per_gpu=10, drop_last=True),   # Batch size = 10, workers_per_gpu = 4

diff --git a/docs_zh_CN/merge_docs.sh b/docs_zh_CN/merge_docs.sh
@@ -11,6 +11,6 @@ cat ../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1
 cat ../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
 cat ../configs/ner/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Named Entity Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >ner_models.md
 
-# replace speical symbols in demo.md
+# replace special symbols in demo.md
 cp ../demo/README.md demo.md
 sed -i 's/:heavy_check_mark:/Yes/g' demo.md && sed -i 's/:x:/No/g' demo.md
diff --git a/docs_zh_CN/training.md b/docs_zh_CN/training.md
@@ -92,7 +92,7 @@ Here we list some configs that are frequently used during training for quick ref
 ```python
 total_epochs = 1200
 data = dict(
-    # Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overrided in dataloader's config.
+    # Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overridden in dataloader's config.
     samples_per_gpu=8, # Batch size per GPU
     workers_per_gpu=4, # Number of workers to process data for each GPU
     train_dataloader=dict(samples_per_gpu=10, drop_last=True),   # Batch size = 10, workers_per_gpu = 4

diff --git a/mmocr/core/evaluation/hmean_ic13.py b/mmocr/core/evaluation/hmean_ic13.py
@@ -53,7 +53,7 @@ def eval_hmean_ic13(det_boxes,
                     one2one_score=1.,
                     one2many_score=0.8,
                     many2one_score=1.):
-    """Evalute hmean of text detection using the icdar2013 standard.
+    """Evaluate hmean of text detection using the icdar2013 standard.
 
     Args:
         det_boxes (list[list[list[float]]]): List of arrays of shape (n, 2k).

diff --git a/mmocr/core/evaluation/hmean_iou.py b/mmocr/core/evaluation/hmean_iou.py
@@ -10,7 +10,7 @@ def eval_hmean_iou(pred_boxes,
                    gt_ignored_boxes,
                    iou_thr=0.5,
                    precision_thr=0.5):
-    """Evalute hmean of text detection using IOU standard.
+    """Evaluate hmean of text detection using IOU standard.
 
     Args:
         pred_boxes (list[list[list[float]]]): Text boxes for an img list. Each

diff --git a/mmocr/core/evaluation/ner_metric.py b/mmocr/core/evaluation/ner_metric.py
@@ -5,7 +5,8 @@
 def gt_label2entity(gt_infos):
     """Get all entities from ground truth infos.
     Args:
-        gt_infos (list[dict]): Ground-truth infomation contains text and label.
+        gt_infos (list[dict]): Ground-truth information contains text and
+            label.
     Returns:
         gt_entities (list[list]): Original labeled entities in groundtruth.
                     [[category,start_position,end_position]]
@@ -94,7 +95,7 @@ def eval_ner_f1(results, gt_infos):
 
     Args:
         results (list): Predict results of entities.
-        gt_infos (list[dict]): Ground-truth infomation which contains
+        gt_infos (list[dict]): Ground-truth information which contains
                             text and label.
     Returns:
         class_info (dict): precision,recall, f1-score of total

diff --git a/mmocr/core/evaluation/utils.py b/mmocr/core/evaluation/utils.py
@@ -484,14 +484,14 @@ def select_top_boundary(boundaries_list, scores_list, score_thr):
 
     Args:
         boundaries_list (list[list[list[float]]]): List of boundaries.
-            The 1st, 2rd, and 3rd indices are for image, text and
+            The 1st, 2nd, and 3rd indices are for image, text and
             vertice, respectively.
         scores_list (list(list[float])): List of lists of scores.
         score_thr (float): The score threshold to filter out bboxes.
 
     Returns:
         selected_bboxes (list[list[list[float]]]): List of boundaries.
-            The 1st, 2rd, and 3rd indices are for image, text and vertice,
+            The 1st, 2nd, and 3rd indices are for image, text and vertice,
             respectively.
     """
     assert isinstance(boundaries_list, list)

diff --git a/mmocr/datasets/pipelines/ocr_seg_targets.py b/mmocr/datasets/pipelines/ocr_seg_targets.py
@@ -10,14 +10,14 @@
 
 @PIPELINES.register_module()
 class OCRSegTargets:
-    """Generate gt shrinked kernels for segmentation based OCR framework.
+    """Generate gt shrunk kernels for segmentation based OCR framework.
 
     Args:
         label_convertor (dict): Dictionary to construct label_convertor
             to convert char to index.
-        attn_shrink_ratio (float): The area shrinked ratio
+        attn_shrink_ratio (float): The area shrunk ratio
             between attention kernels and gt text masks.
-        seg_shrink_ratio (float): The area shrinked ratio
+        seg_shrink_ratio (float): The area shrunk ratio
             between segmentation kernels and gt text masks.
         box_type (str): Character box type, should be either
             'char_rects' or 'char_quads', with 'char_rects'
@@ -51,7 +51,7 @@ def shrink_char_quad(self, char_quad, shrink_ratio):
         Args:
             char_quad (list[float]): Char box with format
                 [x1, y1, x2, y2, x3, y3, x4, y4].
-            shrink_ratio (float): The area shrinked ratio
+            shrink_ratio (float): The area shrunk ratio
                 between gt kernels and gt text masks.
         """
         points = [[char_quad[0], char_quad[1]], [char_quad[2], char_quad[3]],
@@ -92,7 +92,7 @@ def shrink_char_rect(self, char_rect, shrink_ratio):
         Args:
             char_rect (list[float]): Char box with format
                 [x_min, y_min, x_max, y_max].
-            shrink_ratio (float): The area shrinked ratio
+            shrink_ratio (float): The area shrunk ratio
                 between gt kernels and gt text masks.
         """
         x_min, y_min, x_max, y_max = char_rect

diff --git a/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py b/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py
@@ -118,23 +118,22 @@ def generate_kernels(self,
             pco = pyclipper.PyclipperOffset()
             pco.AddPath(instance, pyclipper.JT_ROUND,
                         pyclipper.ET_CLOSEDPOLYGON)
-            shrinked = np.array(pco.Execute(-distance))
+            shrunk = np.array(pco.Execute(-distance))
 
-            # check shrinked == [] or empty ndarray
-            if len(shrinked) == 0 or shrinked.size == 0:
+            # check shrunk == [] or empty ndarray
+            if len(shrunk) == 0 or shrunk.size == 0:
                 if ignore_tags is not None:
                     ignore_tags[text_ind] = True
                 continue
             try:
-                shrinked = np.array(shrinked[0]).reshape(-1, 2)
+                shrunk = np.array(shrunk[0]).reshape(-1, 2)
 
             except Exception as e:
-                print_log(f'{shrinked} with error {e}')
+                print_log(f'{shrunk} with error {e}')
                 if ignore_tags is not None:
                     ignore_tags[text_ind] = True
                 continue
-            cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)],
-                         text_ind + 1)
+            cv2.fillPoly(text_kernel, [shrunk.astype(np.int32)], text_ind + 1)
         return text_kernel, ignore_tags
 
     def generate_effective_mask(self, mask_size: tuple, polygons_ignore):

diff --git a/mmocr/datasets/pipelines/textdet_targets/dbnet_targets.py b/mmocr/datasets/pipelines/textdet_targets/dbnet_targets.py
@@ -11,13 +11,13 @@
 
 @PIPELINES.register_module()
 class DBNetTargets(BaseTextDetTargets):
-    """Generate gt shrinked text, gt threshold map, and their effective region
+    """Generate gt shrunk text, gt threshold map, and their effective region
     masks to learn DBNet: Real-time Scene Text Detection with Differentiable
     Binarization [https://arxiv.org/abs/1911.08947]. This was partially adapted
     from https://github.com/MhLiao/DB.
 
     Args:
-        shrink_ratio (float): The area shrinked ratio between text
+        shrink_ratio (float): The area shrunk ratio between text
             kernels and their text masks.
         thr_min (float): The minimum value of the threshold map.
         thr_max (float): The maximum value of the threshold map.

diff --git a/mmocr/models/textdet/losses/db_loss.py b/mmocr/models/textdet/losses/db_loss.py
@@ -115,9 +115,9 @@ def forward(self, preds, downsample_ratio, gt_shrink, gt_shrink_mask,
             downsample_ratio (float): The downsample ratio for the
                 ground truths.
             gt_shrink (list[BitmapMasks]): The mask list with each element
-                being the shrinked text mask for one img.
+                being the shrunk text mask for one img.
             gt_shrink_mask (list[BitmapMasks]): The effective mask list with
-                each element being the shrinked effective mask for one img.
+                each element being the shrunk effective mask for one img.
             gt_thr (list[BitmapMasks]): The mask list with each element
                 being the threshold text mask for one img.
             gt_thr_mask (list[BitmapMasks]): The effective mask list with

diff --git a/mmocr/models/textdet/losses/pan_loss.py b/mmocr/models/textdet/losses/pan_loss.py
@@ -96,7 +96,7 @@ def forward(self, preds, downsample_ratio, gt_kernels, gt_mask):
             gt_kernels (list[BitmapMasks]): The kernel list with each element
                 being the text kernel mask for one img.
             gt_mask (list[BitmapMasks]): The effective mask list
-                with each element being the effective mask fo one img.
+                with each element being the effective mask for one img.
             downsample_ratio (float): The downsample ratio between preds
                 and the input img.
 

diff --git a/mmocr/models/textdet/losses/pse_loss.py b/mmocr/models/textdet/losses/pse_loss.py
@@ -43,7 +43,7 @@ def forward(self, score_maps, downsample_ratio, gt_kernels, gt_mask):
             gt_kernels (list[BitmapMasks]): The kernel list with each element
                 being the text kernel mask for one img.
             gt_mask (list[BitmapMasks]): The effective mask list
-                with each element being the effective mask fo one img.
+                with each element being the effective mask for one img.
             downsample_ratio (float): The downsample ratio between score_maps
                 and the input img.
 

diff --git a/mmocr/models/textrecog/recognizer/base.py b/mmocr/models/textrecog/recognizer/base.py
@@ -98,7 +98,7 @@ def _parse_losses(self, losses):
 
         Args:
             losses (dict): Raw outputs of the network, which usually contain
-                losses and other necessary infomation.
+                losses and other necessary information.
 
         Returns:
             tuple[tensor, dict]: (loss, log_vars), loss is the loss tensor

diff --git a/mmocr/utils/ocr.py b/mmocr/utils/ocr.py
@@ -155,7 +155,7 @@ def parse_args():
             Path.cwd()))) and (args.det_config != ''
                                or args.recog_config != ''):
         warnings.warn(
-            'config_dir will be overrided by det-config or recog-config.',
+            'config_dir will be overridden by det-config or recog-config.',
             UserWarning)
     return args
 
@@ -642,7 +642,7 @@ def _args_processing(self, args):
             img_list = [args.img]
 
         # Read all image(s) in advance to reduce wasted time
-        # re-reading the images for vizualisation output
+        # re-reading the images for visualization output
         args.arrays = [mmcv.imread(x) for x in img_list]
 
         # Create a list of filenames (used for output images and result files)

diff --git a/tools/deployment/onnx2tensorrt.py b/tools/deployment/onnx2tensorrt.py
@@ -74,23 +74,23 @@ def _prepare_input_img(imgs, test_pipeline: Iterable[dict]):
     test_pipeline = replace_ImageToTensor(test_pipeline)
     test_pipeline = Compose(test_pipeline)
 
-    datas = []
+    data = []
     for img in imgs:
         # prepare data
         # add information into dict
-        data = dict(img_info=dict(filename=img), img_prefix=None)
+        datum = dict(img_info=dict(filename=img), img_prefix=None)
 
         # build the data pipeline
-        data = test_pipeline(data)
+        datum = test_pipeline(datum)
         # get tensor from list to stack for batch mode (text detection)
-        datas.append(data)
+        data.append(datum)
 
-    if isinstance(datas[0]['img'], list) and len(datas) > 1:
+    if isinstance(data[0]['img'], list) and len(data) > 1:
         raise Exception('aug test does not support '
                         f'inference with batch size '
-                        f'{len(datas)}')
+                        f'{len(data)}')
 
-    data = collate(datas, samples_per_gpu=len(imgs))
+    data = collate(data, samples_per_gpu=len(imgs))
 
     # process img_metas
     if isinstance(data['img_metas'], list):
@@ -280,7 +280,7 @@ def parse_args():
     assert args.workspace_size >= 0, 'Workspace size less than 0.'
     for max_value, min_value in zip(args.max_shape, args.min_shape):
         assert max_value >= min_value, \
-            'max_shape sould be larger than min shape'
+            'max_shape should be larger than min shape'
 
     input_config = {
         'min_shape': args.min_shape,

diff --git a/tools/deployment/pytorch2onnx.py b/tools/deployment/pytorch2onnx.py
@@ -95,27 +95,27 @@ def _prepare_data(cfg, imgs):
     cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
     test_pipeline = Compose(cfg.data.test.pipeline)
 
-    datas = []
+    data = []
     for img in imgs:
         # prepare data
         if is_ndarray:
             # directly add img
-            data = dict(img=img)
+            datum = dict(img=img)
         else:
             # add information into dict
-            data = dict(img_info=dict(filename=img), img_prefix=None)
+            datum = dict(img_info=dict(filename=img), img_prefix=None)
 
         # build the data pipeline
-        data = test_pipeline(data)
+        datum = test_pipeline(datum)
         # get tensor from list to stack for batch mode (text detection)
-        datas.append(data)
+        data.append(datum)
 
-    if isinstance(datas[0]['img'], list) and len(datas) > 1:
+    if isinstance(data[0]['img'], list) and len(data) > 1:
         raise Exception('aug test does not support '
                         f'inference with batch size '
-                        f'{len(datas)}')
+                        f'{len(data)}')
 
-    data = collate(datas, samples_per_gpu=len(imgs))
+    data = collate(data, samples_per_gpu=len(imgs))
 
     # process img_metas
     if isinstance(data['img_metas'], list):
@@ -346,7 +346,7 @@ def main():
     parser.add_argument(
         '--dynamic-export',
         action='store_true',
-        help='Whether dynamicly export onnx model.',
+        help='Whether dynamically export onnx model.',
         default=False)
     args = parser.parse_args()