Skip to content

Commit

Permalink
[Feature] Add codespell pre-commit hook and fix typos (open-mmlab#520)
Browse files Browse the repository at this point in the history
* add spellcheck hook

* fix typos
  • Loading branch information
gaotongxiao authored Oct 13, 2021
1 parent 21eb82a commit 76590c5
Show file tree
Hide file tree
Showing 22 changed files with 59 additions and 50 deletions.
5 changes: 5 additions & 0 deletions .codespellrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[codespell]
skip = *.ipynb
count =
quiet-level = 3
ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist
2 changes: 1 addition & 1 deletion .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ git fetch upstream
# update the main branch of your fork
git checkout main
git rebase upsteam/main
git rebase upstream/main
git push origin main
# update the <your_feature_branch> branch
Expand Down
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ repos:
rev: v0.30.0
hooks:
- id: yapf
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.1.0
hooks:
Expand Down
2 changes: 1 addition & 1 deletion docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
- [MMCV](https://mmcv.readthedocs.io/en/latest/#installation) >= 1.3.8
- [MMDetection](https://mmdetection.readthedocs.io/en/latest/#installation) >= 2.14.0

We have tested the following versions of OS and softwares:
We have tested the following versions of OS and software:

- OS: Ubuntu 16.04
- CUDA: 10.1
Expand Down
2 changes: 1 addition & 1 deletion docs/merge_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ cat ../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1
cat ../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
cat ../configs/ner/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Named Entity Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >ner_models.md

# replace speical symbols in demo.md
# replace special symbols in demo.md
cp ../demo/README.md demo.md
sed -i 's/:heavy_check_mark:/Yes/g' demo.md && sed -i 's/:x:/No/g' demo.md
2 changes: 1 addition & 1 deletion docs/training.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Here we list some configs that are frequently used during training for quick ref
```python
total_epochs = 1200
data = dict(
# Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overrided in dataloader's config.
# Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overridden in dataloader's config.
samples_per_gpu=8, # Batch size per GPU
workers_per_gpu=4, # Number of workers to process data for each GPU
train_dataloader=dict(samples_per_gpu=10, drop_last=True), # Batch size = 10, workers_per_gpu = 4
Expand Down
2 changes: 1 addition & 1 deletion docs_zh_CN/merge_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ cat ../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1
cat ../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
cat ../configs/ner/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Named Entity Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >ner_models.md

# replace speical symbols in demo.md
# replace special symbols in demo.md
cp ../demo/README.md demo.md
sed -i 's/:heavy_check_mark:/Yes/g' demo.md && sed -i 's/:x:/No/g' demo.md
2 changes: 1 addition & 1 deletion docs_zh_CN/training.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Here we list some configs that are frequently used during training for quick ref
```python
total_epochs = 1200
data = dict(
# Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overrided in dataloader's config.
# Note: User can configure general settings of train, val and test dataloader by specifying them here. However, their values can be overridden in dataloader's config.
samples_per_gpu=8, # Batch size per GPU
workers_per_gpu=4, # Number of workers to process data for each GPU
train_dataloader=dict(samples_per_gpu=10, drop_last=True), # Batch size = 10, workers_per_gpu = 4
Expand Down
2 changes: 1 addition & 1 deletion mmocr/core/evaluation/hmean_ic13.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def eval_hmean_ic13(det_boxes,
one2one_score=1.,
one2many_score=0.8,
many2one_score=1.):
"""Evalute hmean of text detection using the icdar2013 standard.
"""Evaluate hmean of text detection using the icdar2013 standard.
Args:
det_boxes (list[list[list[float]]]): List of arrays of shape (n, 2k).
Expand Down
2 changes: 1 addition & 1 deletion mmocr/core/evaluation/hmean_iou.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def eval_hmean_iou(pred_boxes,
gt_ignored_boxes,
iou_thr=0.5,
precision_thr=0.5):
"""Evalute hmean of text detection using IOU standard.
"""Evaluate hmean of text detection using IOU standard.
Args:
pred_boxes (list[list[list[float]]]): Text boxes for an img list. Each
Expand Down
5 changes: 3 additions & 2 deletions mmocr/core/evaluation/ner_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
def gt_label2entity(gt_infos):
"""Get all entities from ground truth infos.
Args:
gt_infos (list[dict]): Ground-truth infomation contains text and label.
gt_infos (list[dict]): Ground-truth information contains text and
label.
Returns:
gt_entities (list[list]): Original labeled entities in groundtruth.
[[category,start_position,end_position]]
Expand Down Expand Up @@ -94,7 +95,7 @@ def eval_ner_f1(results, gt_infos):
Args:
results (list): Predict results of entities.
gt_infos (list[dict]): Ground-truth infomation which contains
gt_infos (list[dict]): Ground-truth information which contains
text and label.
Returns:
class_info (dict): precision,recall, f1-score of total
Expand Down
4 changes: 2 additions & 2 deletions mmocr/core/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,14 +484,14 @@ def select_top_boundary(boundaries_list, scores_list, score_thr):
Args:
boundaries_list (list[list[list[float]]]): List of boundaries.
The 1st, 2rd, and 3rd indices are for image, text and
The 1st, 2nd, and 3rd indices are for image, text and
vertice, respectively.
scores_list (list(list[float])): List of lists of scores.
score_thr (float): The score threshold to filter out bboxes.
Returns:
selected_bboxes (list[list[list[float]]]): List of boundaries.
The 1st, 2rd, and 3rd indices are for image, text and vertice,
The 1st, 2nd, and 3rd indices are for image, text and vertice,
respectively.
"""
assert isinstance(boundaries_list, list)
Expand Down
10 changes: 5 additions & 5 deletions mmocr/datasets/pipelines/ocr_seg_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

@PIPELINES.register_module()
class OCRSegTargets:
"""Generate gt shrinked kernels for segmentation based OCR framework.
"""Generate gt shrunk kernels for segmentation based OCR framework.
Args:
label_convertor (dict): Dictionary to construct label_convertor
to convert char to index.
attn_shrink_ratio (float): The area shrinked ratio
attn_shrink_ratio (float): The area shrunk ratio
between attention kernels and gt text masks.
seg_shrink_ratio (float): The area shrinked ratio
seg_shrink_ratio (float): The area shrunk ratio
between segmentation kernels and gt text masks.
box_type (str): Character box type, should be either
'char_rects' or 'char_quads', with 'char_rects'
Expand Down Expand Up @@ -51,7 +51,7 @@ def shrink_char_quad(self, char_quad, shrink_ratio):
Args:
char_quad (list[float]): Char box with format
[x1, y1, x2, y2, x3, y3, x4, y4].
shrink_ratio (float): The area shrinked ratio
shrink_ratio (float): The area shrunk ratio
between gt kernels and gt text masks.
"""
points = [[char_quad[0], char_quad[1]], [char_quad[2], char_quad[3]],
Expand Down Expand Up @@ -92,7 +92,7 @@ def shrink_char_rect(self, char_rect, shrink_ratio):
Args:
char_rect (list[float]): Char box with format
[x_min, y_min, x_max, y_max].
shrink_ratio (float): The area shrinked ratio
shrink_ratio (float): The area shrunk ratio
between gt kernels and gt text masks.
"""
x_min, y_min, x_max, y_max = char_rect
Expand Down
13 changes: 6 additions & 7 deletions mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,23 +118,22 @@ def generate_kernels(self,
pco = pyclipper.PyclipperOffset()
pco.AddPath(instance, pyclipper.JT_ROUND,
pyclipper.ET_CLOSEDPOLYGON)
shrinked = np.array(pco.Execute(-distance))
shrunk = np.array(pco.Execute(-distance))

# check shrinked == [] or empty ndarray
if len(shrinked) == 0 or shrinked.size == 0:
# check shrunk == [] or empty ndarray
if len(shrunk) == 0 or shrunk.size == 0:
if ignore_tags is not None:
ignore_tags[text_ind] = True
continue
try:
shrinked = np.array(shrinked[0]).reshape(-1, 2)
shrunk = np.array(shrunk[0]).reshape(-1, 2)

except Exception as e:
print_log(f'{shrinked} with error {e}')
print_log(f'{shrunk} with error {e}')
if ignore_tags is not None:
ignore_tags[text_ind] = True
continue
cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)],
text_ind + 1)
cv2.fillPoly(text_kernel, [shrunk.astype(np.int32)], text_ind + 1)
return text_kernel, ignore_tags

def generate_effective_mask(self, mask_size: tuple, polygons_ignore):
Expand Down
4 changes: 2 additions & 2 deletions mmocr/datasets/pipelines/textdet_targets/dbnet_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

@PIPELINES.register_module()
class DBNetTargets(BaseTextDetTargets):
"""Generate gt shrinked text, gt threshold map, and their effective region
"""Generate gt shrunk text, gt threshold map, and their effective region
masks to learn DBNet: Real-time Scene Text Detection with Differentiable
Binarization [https://arxiv.org/abs/1911.08947]. This was partially adapted
from https://github.com/MhLiao/DB.
Args:
shrink_ratio (float): The area shrinked ratio between text
shrink_ratio (float): The area shrunk ratio between text
kernels and their text masks.
thr_min (float): The minimum value of the threshold map.
thr_max (float): The maximum value of the threshold map.
Expand Down
4 changes: 2 additions & 2 deletions mmocr/models/textdet/losses/db_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def forward(self, preds, downsample_ratio, gt_shrink, gt_shrink_mask,
downsample_ratio (float): The downsample ratio for the
ground truths.
gt_shrink (list[BitmapMasks]): The mask list with each element
being the shrinked text mask for one img.
being the shrunk text mask for one img.
gt_shrink_mask (list[BitmapMasks]): The effective mask list with
each element being the shrinked effective mask for one img.
each element being the shrunk effective mask for one img.
gt_thr (list[BitmapMasks]): The mask list with each element
being the threshold text mask for one img.
gt_thr_mask (list[BitmapMasks]): The effective mask list with
Expand Down
2 changes: 1 addition & 1 deletion mmocr/models/textdet/losses/pan_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def forward(self, preds, downsample_ratio, gt_kernels, gt_mask):
gt_kernels (list[BitmapMasks]): The kernel list with each element
being the text kernel mask for one img.
gt_mask (list[BitmapMasks]): The effective mask list
with each element being the effective mask fo one img.
with each element being the effective mask for one img.
downsample_ratio (float): The downsample ratio between preds
and the input img.
Expand Down
2 changes: 1 addition & 1 deletion mmocr/models/textdet/losses/pse_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def forward(self, score_maps, downsample_ratio, gt_kernels, gt_mask):
gt_kernels (list[BitmapMasks]): The kernel list with each element
being the text kernel mask for one img.
gt_mask (list[BitmapMasks]): The effective mask list
with each element being the effective mask fo one img.
with each element being the effective mask for one img.
downsample_ratio (float): The downsample ratio between score_maps
and the input img.
Expand Down
2 changes: 1 addition & 1 deletion mmocr/models/textrecog/recognizer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _parse_losses(self, losses):
Args:
losses (dict): Raw outputs of the network, which usually contain
losses and other necessary infomation.
losses and other necessary information.
Returns:
tuple[tensor, dict]: (loss, log_vars), loss is the loss tensor
Expand Down
4 changes: 2 additions & 2 deletions mmocr/utils/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def parse_args():
Path.cwd()))) and (args.det_config != ''
or args.recog_config != ''):
warnings.warn(
'config_dir will be overrided by det-config or recog-config.',
'config_dir will be overridden by det-config or recog-config.',
UserWarning)
return args

Expand Down Expand Up @@ -642,7 +642,7 @@ def _args_processing(self, args):
img_list = [args.img]

# Read all image(s) in advance to reduce wasted time
# re-reading the images for vizualisation output
# re-reading the images for visualization output
args.arrays = [mmcv.imread(x) for x in img_list]

# Create a list of filenames (used for output images and result files)
Expand Down
16 changes: 8 additions & 8 deletions tools/deployment/onnx2tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,23 @@ def _prepare_input_img(imgs, test_pipeline: Iterable[dict]):
test_pipeline = replace_ImageToTensor(test_pipeline)
test_pipeline = Compose(test_pipeline)

datas = []
data = []
for img in imgs:
# prepare data
# add information into dict
data = dict(img_info=dict(filename=img), img_prefix=None)
datum = dict(img_info=dict(filename=img), img_prefix=None)

# build the data pipeline
data = test_pipeline(data)
datum = test_pipeline(datum)
# get tensor from list to stack for batch mode (text detection)
datas.append(data)
data.append(datum)

if isinstance(datas[0]['img'], list) and len(datas) > 1:
if isinstance(data[0]['img'], list) and len(data) > 1:
raise Exception('aug test does not support '
f'inference with batch size '
f'{len(datas)}')
f'{len(data)}')

data = collate(datas, samples_per_gpu=len(imgs))
data = collate(data, samples_per_gpu=len(imgs))

# process img_metas
if isinstance(data['img_metas'], list):
Expand Down Expand Up @@ -280,7 +280,7 @@ def parse_args():
assert args.workspace_size >= 0, 'Workspace size less than 0.'
for max_value, min_value in zip(args.max_shape, args.min_shape):
assert max_value >= min_value, \
'max_shape sould be larger than min shape'
'max_shape should be larger than min shape'

input_config = {
'min_shape': args.min_shape,
Expand Down
18 changes: 9 additions & 9 deletions tools/deployment/pytorch2onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,27 +95,27 @@ def _prepare_data(cfg, imgs):
cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
test_pipeline = Compose(cfg.data.test.pipeline)

datas = []
data = []
for img in imgs:
# prepare data
if is_ndarray:
# directly add img
data = dict(img=img)
datum = dict(img=img)
else:
# add information into dict
data = dict(img_info=dict(filename=img), img_prefix=None)
datum = dict(img_info=dict(filename=img), img_prefix=None)

# build the data pipeline
data = test_pipeline(data)
datum = test_pipeline(datum)
# get tensor from list to stack for batch mode (text detection)
datas.append(data)
data.append(datum)

if isinstance(datas[0]['img'], list) and len(datas) > 1:
if isinstance(data[0]['img'], list) and len(data) > 1:
raise Exception('aug test does not support '
f'inference with batch size '
f'{len(datas)}')
f'{len(data)}')

data = collate(datas, samples_per_gpu=len(imgs))
data = collate(data, samples_per_gpu=len(imgs))

# process img_metas
if isinstance(data['img_metas'], list):
Expand Down Expand Up @@ -346,7 +346,7 @@ def main():
parser.add_argument(
'--dynamic-export',
action='store_true',
help='Whether dynamicly export onnx model.',
help='Whether dynamically export onnx model.',
default=False)
args = parser.parse_args()

Expand Down

0 comments on commit 76590c5

Please sign in to comment.