Skip to content

Commit

Permalink
🐛 Fix consecutive period bug
Browse files Browse the repository at this point in the history
  • Loading branch information
nipunsadvilkar committed Sep 11, 2020
1 parent fc61aef commit f0e71a5
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
5 changes: 3 additions & 2 deletions pysbd/lang/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

class Common(object):

# added special case: r"[。..!!?].*" to handle intermittent dots, exclamation, etc.
SENTENCE_BOUNDARY_REGEX = r"((?:[^)])*)(?=\s?[A-Z])|「(?:[^」])*」(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|\'(?:[^\'])*[^,]\'(?=\s[A-Z])|\"(?:[^\"])*[^,]\"(?=\s[A-Z])|\“(?:[^\”])*[^,]\”(?=\s[A-Z])|[。..!!??].*|\S.*?[。..!!??ȸȹ☉☈☇☄]"
# added special case: r"[。..!!? ]{2,}" to handle intermittent dots, exclamation, etc.
# r"[。..!!?] at end to handle single instances of these symbol inputs
SENTENCE_BOUNDARY_REGEX = r"((?:[^)])*)(?=\s?[A-Z])|「(?:[^」])*」(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|\'(?:[^\'])*[^,]\'(?=\s[A-Z])|\"(?:[^\"])*[^,]\"(?=\s[A-Z])|\“(?:[^\”])*[^,]\”(?=\s[A-Z])|[。..!!?? ]{2,}|\S.*?[。..!!??ȸȹ☉☈☇☄]|[。..!!??]"

# # Rubular: http://rubular.com/r/NqCqv372Ix
QUOTATION_AT_END_OF_SENTENCE_REGEX = r'[!?\.-][\"\'“”]\s{1}[A-Z]'
Expand Down
5 changes: 3 additions & 2 deletions tests/regression/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@
you may copy it, give it away or re-use it under the terms of the this license
""",
[('This eBook is for the use of anyone anywhere at no cost\n', 0, 56),
('you may copy it, give it away or re-use it under the terms of the this license\n', 56, 135)])

('you may copy it, give it away or re-use it under the terms of the this license\n', 56, 135)]),
('#78', 'Sentence. .. Next sentence. Next next sentence.',
[('Sentence. ', 0, 10), ('.. ', 10, 13), ('Next sentence. ', 13, 28), ('Next next sentence.', 28, 47)])
]

@pytest.mark.parametrize('issue_no,text,expected_sents', TEST_ISSUE_DATA)
Expand Down

0 comments on commit f0e71a5

Please sign in to comment.