diff --git a/scspell/__init__.py b/scspell/__init__.py index 4300f4e..c7047c3 100644 --- a/scspell/__init__.py +++ b/scspell/__init__.py @@ -462,8 +462,8 @@ def report_failed_check(match_desc, filename, unmatched_subtokens): def spell_check_token( - match_desc, filename, fq_filename, file_id_ref, - dicts, ignores, report_only): + match_desc, fq_filename, file_id, + dicts, ignores): """Spell check a single token. :param match_desc: description of the token matching instance @@ -485,44 +485,22 @@ def spell_check_token( subtokens = decompose_token(token) unmatched_subtokens = [ st for st in subtokens if len(st) > LEN_THRESHOLD and - (not dicts.match(st, filename, file_id_ref[0])) and + (not dicts.match(st, fq_filename, file_id)) and (st not in ignores)] - if unmatched_subtokens: - unmatched_subtokens = make_unique(unmatched_subtokens) - if report_only: - return (report_failed_check(match_desc, filename, - unmatched_subtokens), - True) - else: - return ( - handle_failed_check_interactively( - match_desc, filename, fq_filename, file_id_ref, - unmatched_subtokens, dicts, ignores), - True) - return ( - (match_desc.get_string(), match_desc.get_ofs() + len(token)), - False) + unmatched_subtokens = make_unique(unmatched_subtokens) + return unmatched_subtokens + return [] -def spell_check_file(filename, dicts, ignores, report_only, c_escapes): - """Spell check a single file. +def spell_check_str(source_text, fq_filename, dicts, ignores, c_escapes): + """Spell check an in-memory "file". - :param filename: name of the file to check + :param fq_filename: fully-qualified filename :param dicts: dictionary set against which to perform matching :type dicts: CorporaFile :param ignores: set of tokens to ignore for this session """ - fq_filename = os.path.normcase(os.path.realpath(filename)) - try: - with _util.open_with_encoding(fq_filename) as source_file: - source_text = source_file.read() - except IOError as e: - print("Error: can't read source file '{}'; " - 'skipping (reason: {})'.format(filename, e), - file=sys.stderr) - return False - # Look for a file ID file_id = None m_id = FILE_ID_REGEX.search(source_text) @@ -535,8 +513,6 @@ def spell_check_file(filename, dicts, ignores, report_only, c_escapes): else: file_id = dicts.file_id_of_file(fq_filename) - file_id_ref = [file_id] # allow for spell_check() creating a file_id - if c_escapes: token_regex = C_ESCAPE_TOKEN_REGEX else: @@ -545,7 +521,6 @@ def spell_check_file(filename, dicts, ignores, report_only, c_escapes): # Search for tokens to spell-check data = source_text pos = 0 - okay = True while True: m = token_regex.search(data, pos) if m is None: @@ -556,19 +531,61 @@ def spell_check_file(filename, dicts, ignores, report_only, c_escapes): # This is matching the file-id. Skip over it. pos = m_id.end() continue - result = spell_check_token(MatchDescriptor(data, m), - filename, fq_filename, file_id_ref, - dicts, ignores, report_only) - (data, pos) = result[0] - error_found = result[1] - if error_found: - okay = False + match_desc = MatchDescriptor(data, m) + unmatched_subtokens = spell_check_token(match_desc, + fq_filename, file_id, + dicts, ignores) + pos = match_desc.get_ofs() + len(match_desc.get_token()) + if unmatched_subtokens: + new_data = yield file_id, match_desc, unmatched_subtokens + if new_data: + data, pos = new_data + + +def spell_check_file(filename, dicts, ignores, report_only, c_escapes): + """Spell check a single file. + + :param filename: name of the file to check + :param dicts: dictionary set against which to perform matching + :type dicts: CorporaFile + :param ignores: set of tokens to ignore for this session + + """ + fq_filename = os.path.normcase(os.path.realpath(filename)) + try: + with _util.open_with_encoding(fq_filename) as source_file: + source_text = source_file.read() + except IOError as e: + print("Error: can't read source file '{}'; " + 'skipping (reason: {})'.format(filename, e), + file=sys.stderr) + return False + + okay = True + speller = spell_check_str(source_text, fq_filename, dicts, ignores, c_escapes) + new_pos = None + while True: + try: + file_id, match_desc, unmatched_subtokens = speller.send(new_pos) + except StopIteration: + break + okay = False + if report_only: + report_failed_check(match_desc, filename, + unmatched_subtokens) + else: + # HACK: Satisfy handle_failed_check_interactively API. Mutation of + # file_id is currently not handled. + file_id_ref = [file_id] + new_pos = handle_failed_check_interactively( + match_desc, filename, fq_filename, file_id_ref, + unmatched_subtokens, dicts, ignores) # Write out the source file if it was modified - if data != source_text: + if new_pos and new_pos[0] != source_text: with _util.open_with_encoding(fq_filename, mode='w') as source_file: try: - source_file.write(data) + source_file.write(new_pos[0]) except IOError as e: print(str(e), file=sys.stderr) return False