Skip to content

Commit

Permalink
Check errors don't exist as valid words in the aspell dictionary (#1142)
Browse files Browse the repository at this point in the history
* Check errors don't exist as valid words in the aspell dictionary

* Install aspell on Travis

* Add some missing packages

* Remove a virtual package

* Just install the version of aspell-python we need

* Keep flake8 happy

* Switch to warnings and count them, so we can see all the aspell errors in one go

* Handle different encoding of the word and aspell

* Try and fix the encoding conversion

* Find out the encoding type

* Don't assert on number of warnings

* Don't record warnings for now

* Warn on all the encoding options

* pprint the encoding

* More warning work

* Use the actual encoding type

* Correct the logic

* ENH: Multi dict support

* FIX: Fixes after merge

* FIX: Better error check

* FIX: More thorough testing, locations

* FIX: Try newer aspell

* FIX: Move to new dict

* FIX: Move

* FIX: Restore removals from #1181

* FIX: One from #1362

* Add rare chack->check, cheque,

* Minor tidy of some dictionary check code

* Add some more suggestions.

* Fix the whitespace

* Really fix the whitespace

* FIX: Refactor requirement

* Log an error when aspell not found and not required

* Fix the error logging

* Test all variants of present and missing from aspell

* Undo some tuple tidying

* Fix the true/false values used

* Skip some flake8 tests

* Fix the test cases

* Correct the not in aspell test and fix some test cases

* Remove a duplicate test

* Use a test word that isn't a typo

* Set the ideal aspell detection logic for each dictionary

I suspect we'll have to relax this as more obscure words won't be in the aspell dictionary

* Be more realistic given the size of the dictionary

* Fix a flake8 error

* Fix another line length error

* FIX: Move

* FIX: Make visible, simplify

Co-authored-by: Eric Larson <larson.eric.d@gmail.com>
  • Loading branch information
peternewman and larsoner authored Apr 6, 2020
1 parent 8d99c0e commit d978da6
Show file tree
Hide file tree
Showing 11 changed files with 428 additions and 243 deletions.
11 changes: 11 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# for it to be on multiple physical lines, so long as you remember: - There
# can't be any leading "-"s - All newlines will be removed, so use ";"s

dist: bionic
language: python
cache: pip
python:
Expand All @@ -12,6 +13,14 @@ python:
- 3.6
- 3.7
- 3.8
env:
REQUIRE_ASPELL=true

addons:
apt:
packages:
- libaspell-dev
- aspell-en

before_install:
- source tools/travis_tools.sh
Expand All @@ -22,6 +31,8 @@ before_install:
- python --version # just to check
- pip install -U pip wheel # upgrade to latest pip find 3.5 wheels; wheel to avoid errors
- retry pip install pytest pytest-cov flake8 coverage codecov chardet setuptools docutils
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then retry pip install aspell-python-py2; fi
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ]; then retry pip install aspell-python-py3; fi
- cd $SRC_DIR

install:
Expand Down
64 changes: 52 additions & 12 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,18 @@

# Users might want to link this file into /usr/local/bin, so we resolve the
# symbolic link path to the real path if necessary.
default_dictionary = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'data', 'dictionary.txt')
_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
_builtin_dictionaries = ( # name, desc, name, err in aspell, correction in aspell # noqa: E501
# The aspell tests here aren't the ideal state, but the None's are realistic
# for obscure words
('clear', 'for unambiguous errors', '', False, None),
('rare', 'for rare but valid words', '_rare', None, None),
('informal', 'for informal words', '_informal', True, True),
('code', 'for words common to code and/or mathematics', '_code', None, None), # noqa: E501
('names', 'for valid proper names that might be typos', '_names', None, None), # noqa: E501
('en-GB_to_en-US', 'for corrections from en-GB to en-US', '_en-GB_to_en-US', True, True), # noqa: E501
)
_builtin_default = 'clear,rare'

# OPTIONS:
#
Expand Down Expand Up @@ -216,11 +226,21 @@ def parse_options(args):
help='write changes in place if possible')

parser.add_argument('-D', '--dictionary',
action='append', metavar='FILE',
action='append',
help='Custom dictionary file that contains spelling '
'corrections. If this flag is not specified or '
'equals "-" then the default dictionary is used. '
'This option can be specified multiple times.')
builtin_opts = ', '.join(
'%r %s' % (d[0], d[1]) for d in _builtin_dictionaries)
parser.add_argument('--builtin',
dest='builtin', default=_builtin_default,
metavar='BUILTIN-LIST',
help='Comma-separated list of builtin dictionaries '
'to include (when "-D -" or no "-D" is passed). '
'Current options are:\n%s. The default is '
'"--builtin %s".'
% (builtin_opts, _builtin_default))
parser.add_argument('-I', '--ignore-words',
action='append', metavar='FILE',
help='File that contains words which will be ignored '
Expand Down Expand Up @@ -603,7 +623,7 @@ def main(*args):
ignore_words_files = options.ignore_words or []
ignore_words = set()
for ignore_words_file in ignore_words_files:
if not os.path.exists(ignore_words_file):
if not os.path.isfile(ignore_words_file):
print('ERROR: cannot find ignore-words file: %s' %
ignore_words_file, file=sys.stderr)
parser.print_help()
Expand All @@ -615,16 +635,36 @@ def main(*args):
for word in comma_separated_words.split(','):
ignore_words.add(word.strip())

dictionaries = options.dictionary or [default_dictionary]
misspellings = dict()
if options.dictionary:
dictionaries = options.dictionary
else:
dictionaries = ['-']
use_dictionaries = list()
for dictionary in dictionaries:
if dictionary == "-":
dictionary = default_dictionary
if not os.path.exists(dictionary):
print('ERROR: cannot find dictionary file: %s' % dictionary,
file=sys.stderr)
parser.print_help()
return 1
# figure out which builtin dictionaries to use
use = sorted(set(options.builtin.split(',')))
for u in use:
for builtin in _builtin_dictionaries:
if builtin[0] == u:
use_dictionaries.append(
os.path.join(_data_root, 'dictionary%s.txt'
% (builtin[2],)))
break
else:
print('ERROR: Unknown builtin dictionary: %s' % (u,),
file=sys.stderr)
parser.print_help()
return 1
else:
if not os.path.isfile(dictionary):
print('ERROR: cannot find dictionary file: %s' % dictionary,
file=sys.stderr)
parser.print_help()
return 1
use_dictionaries.append(dictionary)
misspellings = dict()
for dictionary in use_dictionaries:
build_dict(dictionary, misspellings, ignore_words)
colors = TermColors()
if not options.colors or sys.platform == 'win32':
Expand Down
Loading

0 comments on commit d978da6

Please sign in to comment.