Skip to content

Commit

Permalink
Added option to list only unique misspellings.
Browse files Browse the repository at this point in the history
Added the `--unique-list` command line argument which enables to only see the unique errors in a report only
manner. Thus, recurring not matching tokens will be lumped together as one. Also, shows the corresponding line
numbers where the misspelling occurs.

The `list_unique` parameter was added to `spell_check()`, `spell_check_file()` and `spell_check_token()`
functions.

Closes #24
  • Loading branch information
dhruvsomani committed Nov 11, 2018
1 parent 3f00214 commit db0f8dd
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 9 deletions.
50 changes: 42 additions & 8 deletions scspell/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ def report_failed_check(match_desc, filename, unmatched_subtokens):

def spell_check_token(
match_desc, filename, fq_filename, file_id_ref,
dicts, ignores, report_only):
dicts, ignores, report_only, list_unique):
"""Spell check a single token.
:param match_desc: description of the token matching instance
Expand All @@ -487,9 +487,15 @@ def spell_check_token(
st for st in subtokens if len(st) > LEN_THRESHOLD and
(not dicts.match(st, filename, file_id_ref[0])) and
(st not in ignores)]

if unmatched_subtokens:
unmatched_subtokens = make_unique(unmatched_subtokens)
if report_only:
if list_unique:
return ((match_desc.get_string(),
match_desc.get_ofs() + len(token)),
True)

elif report_only:
return (report_failed_check(match_desc, filename,
unmatched_subtokens),
True)
Expand All @@ -499,12 +505,13 @@ def spell_check_token(
match_desc, filename, fq_filename, file_id_ref,
unmatched_subtokens, dicts, ignores),
True)

return (
(match_desc.get_string(), match_desc.get_ofs() + len(token)),
False)


def spell_check_file(filename, dicts, ignores, report_only, c_escapes):
def spell_check_file(filename, dicts, ignores, report_only, c_escapes, list_unique):
"""Spell check a single file.
:param filename: name of the file to check
Expand Down Expand Up @@ -544,8 +551,11 @@ def spell_check_file(filename, dicts, ignores, report_only, c_escapes):

# Search for tokens to spell-check
data = source_text
prev = 0
pos = 0
okay = True
errors = dict()

while True:
m = token_regex.search(data, pos)
if m is None:
Expand All @@ -556,13 +566,32 @@ def spell_check_file(filename, dicts, ignores, report_only, c_escapes):
# This is matching the file-id. Skip over it.
pos = m_id.end()
continue
result = spell_check_token(MatchDescriptor(data, m),
md = MatchDescriptor(data, m)
result = spell_check_token(md,
filename, fq_filename, file_id_ref,
dicts, ignores, report_only)
dicts, ignores, report_only, list_unique)
prev = pos
(data, pos) = result[0]
error_found = result[1]

if error_found:
okay = False
if list_unique:
err = data[prev:pos]
if err in errors:
errors[err].append(md.get_line_num())
else:
errors[err] = [md.get_line_num()]

if list_unique:
errors = list(errors.items())
errors.sort(key=lambda x: x[1])

print('Set of mispelled tokens:', file=sys.stderr)
for row in errors:
print('%s:[%s]: %s' % (filename,
', '.join(list(map(str, row[1]))),
row[0].lstrip()), file=sys.stderr)

# Write out the source file if it was modified
if data != source_text:
Expand Down Expand Up @@ -673,7 +702,7 @@ def find_dict_file(override_dictionary):
def spell_check(source_filenames, override_dictionary=None,
base_dicts=[],
relative_to=None, report_only=False, c_escapes=True,
test_input=False):
test_input=False, list_unique=False):
"""Run the interactive spell checker on the set of source_filenames.
If override_dictionary is provided, it shall be used as a dictionary
Expand All @@ -691,7 +720,8 @@ def spell_check(source_filenames, override_dictionary=None,
with CorporaFile(dict_file, base_dicts, relative_to) as dicts:
ignores = set()
for f in source_filenames:
if not spell_check_file(f, dicts, ignores, report_only, c_escapes):
if not spell_check_file(f, dicts, ignores, report_only, c_escapes,
list_unique):
okay = False
return okay

Expand Down Expand Up @@ -804,6 +834,9 @@ def main():
'--no-c-escapes', dest='c_escapes',
action='store_false', default=True,
help='treat \\label as label, for e.g. LaTeX')
spell_group.add_argument(
'--list-unique', dest='list_unique', action='store_true',
help='lists only unique errors in non-interactive report')

dict_group.add_argument(
'--override-dictionary', dest='override_filename',
Expand Down Expand Up @@ -949,5 +982,6 @@ def main():
args.relative_to,
args.report,
args.c_escapes,
args.test_input)
args.test_input,
args.list_unique)
return 0 if okay else 1
2 changes: 1 addition & 1 deletion scspell/_portable.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import msvcrt

def msvcrt_getch():
return msvcrt.getch()
return msvcrt.getwch()
getch = msvcrt_getch

except ImportError:
Expand Down
10 changes: 10 additions & 0 deletions scspell/data/dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ altsep
altseps
argv
bdist
bytearray
classmethod
cmdclass
codecs
commonprefix
Expand All @@ -287,12 +289,15 @@ copysign
curdir
dedent
defpath
delattr
delitem
devnull
dirnames
dirpath
distutils
divmod
dotall
dreload
eexist
endswith
enoent
Expand All @@ -313,6 +318,7 @@ expandvars
extsep
fileno
followlinks
frozenset
getatime
getattr
getctime
Expand All @@ -327,6 +333,7 @@ gettext
getvalue
getwriter
hasattr
ipython
isabs
isdir
isfile
Expand All @@ -335,14 +342,17 @@ isinstance
islink
ismount
isnan
issubclass
iteritems
kwarg
kwargs
lexists
linesep
listdir
memoryview
metavar
mkdir
nonlocal
nowait
onerror
opendir
Expand Down

0 comments on commit db0f8dd

Please sign in to comment.