From ac8cb3e403a448ac5ac621f25a622725e1322572 Mon Sep 17 00:00:00 2001 From: TheJackiMonster Date: Sun, 29 Mar 2020 05:07:53 +0200 Subject: [PATCH] Added support for LanguageTool via 'language_check' as advanced spellchecker --- manuskript/functions/spellchecker.py | 200 +++++++++++++++++- .../ui/highlighters/basicHighlighter.py | 84 +++++--- manuskript/ui/views/MDEditCompleter.py | 10 +- manuskript/ui/views/MDEditView.py | 6 +- manuskript/ui/views/textEditView.py | 177 +++++++++++++--- 5 files changed, 409 insertions(+), 68 deletions(-) diff --git a/manuskript/functions/spellchecker.py b/manuskript/functions/spellchecker.py index da41f0db..d3b90144 100644 --- a/manuskript/functions/spellchecker.py +++ b/manuskript/functions/spellchecker.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # --!-- coding: utf8 --!-- -import os, gzip, json, glob +import os, gzip, json, glob, re from PyQt5.QtCore import QLocale from collections import OrderedDict from manuskript.functions import writablePath @@ -28,6 +28,11 @@ symspellpy = None +try: + import language_check as languagetool +except: + languagetool = None + class Spellchecker: dictionaries = {} # In order of priority @@ -117,6 +122,17 @@ def getDictionary(dictionary): pass return None +class BasicMatch: + def __init__(self, startIndex, endIndex): + self.start = startIndex + self.end = endIndex + self.locqualityissuetype = 'misspelling' + self.replacements = [] + self.msg = '' + + def getWord(self, text): + return text[self.start:self.end] + class BasicDictionary: def __init__(self, name): self._lang = name @@ -162,12 +178,45 @@ def getDefaultDictionary(): def availableDictionaries(): raise NotImplemented + def checkText(self, text): + # Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/ + WORDS = r'(?iu)((?:[^_\W]|\')+)[^A-Za-z0-9\']' + # (?iu) means case insensitive and Unicode + # ((?:[^_\W]|\')+) means words exclude underscores but include apostrophes + # [^A-Za-z0-9\'] used with above hack to prevent spellcheck while typing word + # + # See also https://stackoverflow.com/questions/2062169/regex-w-in-utf-8 + + matches = [] + + for word_object in re.finditer(WORDS, text): + word = word_object.group(1) + + if (self.isMisspelled(word) and not self.isCustomWord(word)): + matches.append(BasicMatch( + word_object.start(1), word_object.end(1) + )) + + return matches + def isMisspelled(self, word): raise NotImplemented def getSuggestions(self, word): raise NotImplemented + def findSuggestions(self, text, start, end): + if start < end: + word = text[start:end] + + if (self.isMisspelled(word) and not self.isCustomWord(word)): + match = BasicMatch(start, end) + match.replacements = self.getSuggestions(word) + + return [ match ] + + return [] + def isCustomWord(self, word): return word.lower() in self._customDict @@ -248,6 +297,9 @@ def isMisspelled(self, word): def getSuggestions(self, word): return self._dict.suggest(word) + def findSuggestions(self, text, start, end): + return [] + def isCustomWord(self, word): return self._dict.is_added(word) @@ -422,8 +474,152 @@ def removeWord(self, word): # Since 6.3.8 self._dict.delete_dictionary_entry(word) +class LanguageToolCache: + + def __init__(self, tool, text): + self._length = len(text) + self._matches = self._buildMatches(tool, text) + + def getMatches(self): + return self._matches + + def _buildMatches(self, tool, text): + matches = [] + + for match in tool.check(text): + start = match.offset + end = start + match.errorlength + + basic_match = BasicMatch(start, end) + basic_match.locqualityissuetype = match.locqualityissuetype + basic_match.replacements = match.replacements + basic_match.msg = match.msg + + matches.append(basic_match) + + return matches + + def update(self, tool, text): + if len(text) != self._length: + self._matches = self._buildMatches(tool, text) + +class LanguageToolDictionary(BasicDictionary): + + def __init__(self, name): + BasicDictionary.__init__(self, name) + + if not (self._lang and self._lang in languagetool.get_languages()): + self._lang = self.getDefaultDictionary() + + self._tool = languagetool.LanguageTool(self._lang) + self._cache = {} + + @staticmethod + def getLibraryName(): + return "LanguageCheck" + + @staticmethod + def getLibraryURL(): + return "https://pypi.org/project/language-check/" + + @staticmethod + def isInstalled(): + if languagetool is not None: + + # This check, if Java is installed, is necessary to + # make sure LanguageTool can be run without problems. + # + return (os.system('java -version') == 0) + + return False + + @staticmethod + def availableDictionaries(): + if LanguageToolDictionary.isInstalled(): + languages = list(languagetool.get_languages()) + languages.sort() + return languages + return [] + + @staticmethod + def getDefaultDictionary(): + if not LanguageToolDictionary.isInstalled(): + return None + + default_locale = languagetool.get_locale_language() + if default_locale and not default_locale in languagetool.get_languages(): + default_locale = None + + if default_locale is None: + default_locale = QLocale.system().name() + if default_locale is None: + default_locale = self.availableDictionaries()[0] + + return default_locale + + def checkText(self, text): + matches = [] + + if len(text) == 0: + return matches + + textId = hash(text) + cacheEntry = None + + if not textId in self._cache: + cacheEntry = LanguageToolCache(self._tool, text) + + self._cache[textId] = cacheEntry + else: + cacheEntry = self._cache[textId] + cacheEntry.update(self._tool, text) + + for match in cacheEntry.getMatches(): + word = match.getWord(text) + + if not (match.locqualityissuetype == 'misspelling' and self.isCustomWord(word)): + matches.append(match) + + return matches + + def isMisspelled(self, word): + if self.isCustomWord(word): + return False + + for match in self.checkText(word): + if match.locqualityissuetype == 'misspelling': + return True + + return False + + def getSuggestions(self, word): + suggestions = [] + + for match in self.checkText(word): + suggestions += match.replacements + + return suggestions + + def findSuggestions(self, text, start, end): + matches = [] + checked = self.checkText(text) + + if start == end: + # Check for containing area: + for match in checked: + if (start >= match.start and start <= match.end): + matches.append(match) + else: + # Check for overlapping area: + for match in checked: + if (match.end > start and match.start < end): + matches.append(match) + + return matches + # Register the implementations in order of priority -Spellchecker.implementations.append(EnchantDictionary) +Spellchecker.registerImplementation(EnchantDictionary) Spellchecker.registerImplementation(SymSpellDictionary) Spellchecker.registerImplementation(PySpellcheckerDictionary) +Spellchecker.registerImplementation(LanguageToolDictionary) diff --git a/manuskript/ui/highlighters/basicHighlighter.py b/manuskript/ui/highlighters/basicHighlighter.py index 362ee5a2..17ae8bd7 100644 --- a/manuskript/ui/highlighters/basicHighlighter.py +++ b/manuskript/ui/highlighters/basicHighlighter.py @@ -18,7 +18,6 @@ def __init__(self, editor): QSyntaxHighlighter.__init__(self, editor.document()) self.editor = editor - self._misspelledColor = Qt.red self._defaultBlockFormat = QTextBlockFormat() self._defaultCharFormat = QTextCharFormat() self.defaultTextColor = QColor(S.text) @@ -27,6 +26,40 @@ def __init__(self, editor): self.linkColor = QColor(S.link) self.spellingErrorColor = QColor(Qt.red) + # Matches during checking can be separated by their type (all of them listed here): + # https://languagetool.org/development/api/org/languagetool/rules/ITSIssueType.html + # + # These are the colors for actual spell-, grammar- and style-checking: + self._errorColors = { + 'addition' : QColor(255, 215, 0), # gold + 'characters' : QColor(135, 206, 235), # sky blue + 'duplication' : QColor(0, 255, 255), # cyan / aqua + 'formatting' : QColor(0, 128, 128), # teal + 'grammar' : QColor(0, 0, 255), # blue + 'inconsistency' : QColor(128, 128, 0), # olive + 'inconsistententities' : QColor(46, 139, 87), # sea green + 'internationalization' : QColor(255, 165, 0), # orange + 'legal' : QColor(255, 69, 0), # orange red + 'length' : QColor(47, 79, 79), # dark slate gray + 'localespecificcontent' : QColor(188, 143, 143),# rosy brown + 'localeviolation' : QColor(128, 0, 0), # maroon + 'markup' : QColor(128, 0, 128), # purple + 'misspelling' : QColor(255, 0, 0), # red + 'mistranslation' : QColor(255, 0, 255), # magenta / fuchsia + 'nonconformance' : QColor(255, 218, 185), # peach puff + 'numbers' : QColor(65, 105, 225), # royal blue + 'omission' : QColor(255, 20, 147), # deep pink + 'other' : QColor(138, 43, 226), # blue violet + 'patternproblem' : QColor(0, 128, 0), # green + 'register' : QColor(112,128,144), # slate gray + 'style' : QColor(0, 255, 0), # lime + 'terminology' : QColor(0, 0, 128), # navy + 'typographical' : QColor(255, 255, 0), # yellow + 'uncategorized' : QColor(128, 128, 128), # gray + 'untranslated' : QColor(210, 105, 30), # chocolate + 'whitespace' : QColor(192, 192, 192) # silver + } + def setDefaultBlockFormat(self, bf): self._defaultBlockFormat = bf self.rehighlight() @@ -36,7 +69,7 @@ def setDefaultCharFormat(self, cf): self.rehighlight() def setMisspelledColor(self, color): - self._misspelledColor = color + self._errorColors['misspelled'] = color def updateColorScheme(self, rehighlight=True): """ @@ -134,32 +167,25 @@ def highlightBlockAfter(self, text): txt.end() - txt.start(), fmt) - # Spell checking - - # Following algorithm would not check words at the end of line. - # This hacks adds a space to every line where the text cursor is not - # So that it doesn't spellcheck while typing, but still spellchecks at - # end of lines. See github's issue #166. - textedText = text - if self.currentBlock().position() + len(text) != \ - self.editor.textCursor().position(): - textedText = text + " " - - # Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/ - WORDS = r'(?iu)((?:[^_\W]|\')+)[^A-Za-z0-9\']' - # (?iu) means case insensitive and Unicode - # ((?:[^_\W]|\')+) means words exclude underscores but include apostrophes - # [^A-Za-z0-9\'] used with above hack to prevent spellcheck while typing word - # - # See also https://stackoverflow.com/questions/2062169/regex-w-in-utf-8 - if hasattr(self.editor, "spellcheck") and self.editor.spellcheck: - for word_object in re.finditer(WORDS, textedText): - if (self.editor._dict - and self.editor._dict.isMisspelled(word_object.group(1))): - format = self.format(word_object.start(1)) - format.setUnderlineColor(self._misspelledColor) + if hasattr(self.editor, "spellcheck") and self.editor.spellcheck and self.editor._dict: + # Spell checking + + # Following algorithm would not check words at the end of line. + # This hacks adds a space to every line where the text cursor is not + # So that it doesn't spellcheck while typing, but still spellchecks at + # end of lines. See github's issue #166. + textedText = text + if self.currentBlock().position() + len(text) != \ + self.editor.textCursor().position(): + textedText = text + " " + + # The text should only be checked once as a whole + for match in self.editor._dict.checkText(textedText): + if match.locqualityissuetype in self._errorColors: + highlight_color = self._errorColors[match.locqualityissuetype] + + format = self.format(match.start) + format.setUnderlineColor(highlight_color) # SpellCheckUnderline fails with some fonts format.setUnderlineStyle(QTextCharFormat.WaveUnderline) - self.setFormat(word_object.start(1), - word_object.end(1) - word_object.start(1), - format) + self.setFormat(match.start, match.end - match.start, format) diff --git a/manuskript/ui/views/MDEditCompleter.py b/manuskript/ui/views/MDEditCompleter.py index e0db6808..0101238d 100644 --- a/manuskript/ui/views/MDEditCompleter.py +++ b/manuskript/ui/views/MDEditCompleter.py @@ -106,13 +106,18 @@ def popupCompleter(self): self.completer.popup(self.textUnderCursor(select=True)) def mouseMoveEvent(self, event): + """ + When mouse moves, we show tooltip when appropriate. + """ + self.beginTooltipMoveEvent() MDEditView.mouseMoveEvent(self, event) + self.endTooltipMoveEvent() onRef = [r for r in self.refRects if r.contains(event.pos())] if not onRef: qApp.restoreOverrideCursor() - QToolTip.hideText() + self.hideTooltip() return cursor = self.cursorForPosition(event.pos()) @@ -120,7 +125,8 @@ def mouseMoveEvent(self, event): if ref: if not qApp.overrideCursor(): qApp.setOverrideCursor(Qt.PointingHandCursor) - QToolTip.showText(self.mapToGlobal(event.pos()), Ref.tooltip(ref)) + + self.showTooltip(self.mapToGlobal(event.pos()), Ref.tooltip(ref)) def mouseReleaseEvent(self, event): MDEditView.mouseReleaseEvent(self, event) diff --git a/manuskript/ui/views/MDEditView.py b/manuskript/ui/views/MDEditView.py index e8eb5644..c5f4c338 100644 --- a/manuskript/ui/views/MDEditView.py +++ b/manuskript/ui/views/MDEditView.py @@ -506,13 +506,15 @@ def mouseMoveEvent(self, event): """ When mouse moves, we show tooltip when appropriate. """ + self.beginTooltipMoveEvent() textEditView.mouseMoveEvent(self, event) + self.endTooltipMoveEvent() onRect = [r for r in self.clickRects if r.rect.contains(event.pos())] if not onRect: qApp.restoreOverrideCursor() - QToolTip.hideText() + self.hideTooltip() return ct = onRect[0] @@ -534,7 +536,7 @@ def mouseMoveEvent(self, event): if tooltip: tooltip = self.tr("{} (CTRL+Click to open)").format(tooltip) - QToolTip.showText(self.mapToGlobal(event.pos()), tooltip) + self.showTooltip(self.mapToGlobal(event.pos()), tooltip) def mouseReleaseEvent(self, event): textEditView.mouseReleaseEvent(self, event) diff --git a/manuskript/ui/views/textEditView.py b/manuskript/ui/views/textEditView.py index 41324eb5..ea58ed81 100644 --- a/manuskript/ui/views/textEditView.py +++ b/manuskript/ui/views/textEditView.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # --!-- coding: utf8 --!-- -import re +import re, textwrap from PyQt5.Qt import QApplication from PyQt5.QtCore import QTimer, QModelIndex, Qt, QEvent, pyqtSignal, QRegExp, QLocale, QPersistentModelIndex, QMutex from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat, QFont, QColor, QIcon, QMouseEvent, QTextCursor -from PyQt5.QtWidgets import QWidget, QTextEdit, qApp, QAction, QMenu +from PyQt5.QtWidgets import QWidget, QTextEdit, qApp, QAction, QMenu, QToolTip from manuskript import settings from manuskript.enums import Outline, World, Character, Plot @@ -47,6 +47,8 @@ def __init__(self, parent=None, index=None, html=None, spellcheck=None, self.highlightWord = "" self.highligtCS = False self._dict = None + self._tooltip = { 'depth' : 0, 'active' : 0 } + # self.document().contentsChanged.connect(self.submit, F.AUC) # Submit text changed only after 500ms without modifications @@ -393,6 +395,49 @@ def mousePressEvent(self, event): Qt.LeftButton, Qt.LeftButton, Qt.NoModifier) QTextEdit.mousePressEvent(self, event) + def beginTooltipMoveEvent(self): + self._tooltip['depth'] += 1 + + def endTooltipMoveEvent(self): + self._tooltip['depth'] -= 1 + + def showTooltip(self, pos, text): + QToolTip.showText(pos, text) + self._tooltip['active'] = self._tooltip['depth'] + + def hideTooltip(self): + if self._tooltip['active'] == self._tooltip['depth']: + QToolTip.hideText() + + def mouseMoveEvent(self, event): + """ + When mouse moves, we show tooltip when appropriate. + """ + self.beginTooltipMoveEvent() + QTextEdit.mouseMoveEvent(self, event) + self.endTooltipMoveEvent() + + match = None + + # Check if the selected word has any suggestions for correction + if self.spellcheck and self._dict: + cursor = self.cursorForPosition(event.pos()) + + # Searches for correlating/overlapping matches + suggestions = self._dict.findSuggestions(self.toPlainText(), cursor.selectionStart(), cursor.selectionEnd()) + + if len(suggestions) > 0: + # I think it should focus on one type of error at a time. + match = suggestions[0] + + if match: + # Wrap the message into a fitting width + msg_lines = textwrap.wrap(match.msg, 48) + + self.showTooltip(event.globalPos(), "\n".join(msg_lines)) + else: + self.hideTooltip() + def wheelEvent(self, event): """ We catch wheelEvent if key modifier is CTRL to change font size. @@ -440,42 +485,108 @@ def createStandardContextMenu(self): if not self.spellcheck: return popup_menu - # Select the word under the cursor. - # But only if there is no selection (otherwise it's impossible to select more text to copy/cut) cursor = self.textCursor() - if not cursor.hasSelection(): - cursor.select(QTextCursor.WordUnderCursor) - self.setTextCursor(cursor) - - # Check if the selected word is misspelled and offer spelling - # suggestions if it is. - if self._dict and cursor.hasSelection(): - text = str(cursor.selectedText()) - valid = not self._dict.isMisspelled(text) - selectedWord = cursor.selectedText() + suggestions = [] + selectedWord = None + + # Check for any suggestions for corrections at the cursors position + if self._dict: + text = self.toPlainText() + + suggestions = self._dict.findSuggestions(text, cursor.selectionStart(), cursor.selectionEnd()) + + # Select the word under the cursor if necessary. + # But only if there is no selection (otherwise it's impossible to select more text to copy/cut) + if (not cursor.hasSelection() and len(suggestions) == 0): + cursor.select(QTextCursor.WordUnderCursor) + self.setTextCursor(cursor) + + if cursor.hasSelection(): + selectedWord = cursor.selectedText() + + # Check if the selected word is misspelled and offer spelling + # suggestions if it is. + suggestions = self._dict.findSuggestions(text, cursor.selectionStart(), cursor.selectionEnd()) + + if (len(suggestions) > 0 or selectedWord): + valid = len(suggestions) == 0 + if not valid: - spell_menu = QMenu(self.tr('Spelling Suggestions'), self) - spell_menu.setIcon(F.themeIcon("spelling")) - for word in self._dict.getSuggestions(text): - action = self.SpellAction(word, spell_menu) - action.correct.connect(self.correctWord) - spell_menu.addAction(action) + # I think it should focus on one type of error at a time. + match = suggestions[0] + popup_menu.insertSeparator(popup_menu.actions()[0]) - # Adds: add to dictionary - addAction = QAction(self.tr("&Add to dictionary"), popup_menu) - addAction.setIcon(QIcon.fromTheme("list-add")) - addAction.triggered.connect(self.addWordToDict) - addAction.setData(selectedWord) - popup_menu.insertAction(popup_menu.actions()[0], addAction) - # Only add the spelling suggests to the menu if there are - # suggestions. - if len(spell_menu.actions()) != 0: - # Adds: suggestions - popup_menu.insertMenu(popup_menu.actions()[0], spell_menu) - # popup_menu.insertSeparator(popup_menu.actions()[0]) + + if match.locqualityissuetype == 'misspelling': + spell_menu = QMenu(self.tr('Spelling Suggestions'), self) + spell_menu.setIcon(F.themeIcon("spelling")) + + if (match.end > match.start and not selectedWord): + # Select the actual area of the match + cursor = self.textCursor() + cursor.setPosition(match.start, QTextCursor.MoveAnchor); + cursor.setPosition(match.end, QTextCursor.KeepAnchor); + self.setTextCursor(cursor) + + selectedWord = cursor.selectedText() + + for word in match.replacements: + action = self.SpellAction(word, spell_menu) + action.correct.connect(self.correctWord) + spell_menu.addAction(action) + + # Adds: add to dictionary + addAction = QAction(self.tr("&Add to dictionary"), popup_menu) + addAction.setIcon(QIcon.fromTheme("list-add")) + addAction.triggered.connect(self.addWordToDict) + addAction.setData(selectedWord) + + popup_menu.insertAction(popup_menu.actions()[0], addAction) + + # Only add the spelling suggests to the menu if there are + # suggestions. + if len(match.replacements) > 0: + # Adds: suggestions + popup_menu.insertMenu(popup_menu.actions()[0], spell_menu) + else: + correct_menu = None + correct_action = None + + if (len(match.replacements) > 0 and match.end > match.start): + # Select the actual area of the match + cursor = self.textCursor() + cursor.setPosition(match.start, QTextCursor.MoveAnchor); + cursor.setPosition(match.end, QTextCursor.KeepAnchor); + self.setTextCursor(cursor) + + if len(match.replacements) > 0: + correct_menu = QMenu(self.tr('&Correction Suggestions'), self) + correct_menu.setIcon(F.themeIcon("spelling")) + + for word in match.replacements: + action = self.SpellAction(word, correct_menu) + action.correct.connect(self.correctWord) + correct_menu.addAction(action) + + if correct_menu == None: + correct_action = QAction(self.tr('&Correction Suggestion'), popup_menu) + correct_action.setIcon(F.themeIcon("spelling")) + correct_action.setEnabled(False) + + # Wrap the message into a fitting width + msg_lines = textwrap.wrap(match.msg, 48) + + # Insert the lines of the message backwards + for i in range(0, len(msg_lines)): + popup_menu.insertSection(popup_menu.actions()[0], msg_lines[len(msg_lines) - (i + 1)]) + + if correct_menu != None: + popup_menu.insertMenu(popup_menu.actions()[0], correct_menu) + else: + popup_menu.insertAction(popup_menu.actions()[0], correct_action) # If word was added to custom dict, give the possibility to remove it - elif valid and self._dict.isCustomWord(selectedWord): + elif self._dict.isCustomWord(selectedWord): popup_menu.insertSeparator(popup_menu.actions()[0]) # Adds: remove from dictionary rmAction = QAction(self.tr("&Remove from custom dictionary"), popup_menu)