Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added basic support for LanguageTool via 'language_check' as advanced spellchecker #747

Merged
merged 1 commit into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 198 additions & 2 deletions manuskript/functions/spellchecker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# --!-- coding: utf8 --!--

import os, gzip, json, glob
import os, gzip, json, glob, re
from PyQt5.QtCore import QLocale
from collections import OrderedDict
from manuskript.functions import writablePath
Expand All @@ -28,6 +28,11 @@
symspellpy = None


try:
import language_check as languagetool
except:
languagetool = None

class Spellchecker:
dictionaries = {}
# In order of priority
Expand Down Expand Up @@ -117,6 +122,17 @@ def getDictionary(dictionary):
pass
return None

class BasicMatch:
def __init__(self, startIndex, endIndex):
self.start = startIndex
self.end = endIndex
self.locqualityissuetype = 'misspelling'
self.replacements = []
self.msg = ''

def getWord(self, text):
return text[self.start:self.end]

class BasicDictionary:
def __init__(self, name):
self._lang = name
Expand Down Expand Up @@ -162,12 +178,45 @@ def getDefaultDictionary():
def availableDictionaries():
raise NotImplemented

def checkText(self, text):
# Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/
WORDS = r'(?iu)((?:[^_\W]|\')+)[^A-Za-z0-9\']'
# (?iu) means case insensitive and Unicode
# ((?:[^_\W]|\')+) means words exclude underscores but include apostrophes
# [^A-Za-z0-9\'] used with above hack to prevent spellcheck while typing word
#
# See also https://stackoverflow.com/questions/2062169/regex-w-in-utf-8

matches = []

for word_object in re.finditer(WORDS, text):
word = word_object.group(1)

if (self.isMisspelled(word) and not self.isCustomWord(word)):
matches.append(BasicMatch(
word_object.start(1), word_object.end(1)
))

return matches

def isMisspelled(self, word):
raise NotImplemented

def getSuggestions(self, word):
raise NotImplemented

def findSuggestions(self, text, start, end):
if start < end:
word = text[start:end]

if (self.isMisspelled(word) and not self.isCustomWord(word)):
match = BasicMatch(start, end)
match.replacements = self.getSuggestions(word)

return [ match ]

return []

def isCustomWord(self, word):
return word.lower() in self._customDict

Expand Down Expand Up @@ -248,6 +297,9 @@ def isMisspelled(self, word):
def getSuggestions(self, word):
return self._dict.suggest(word)

def findSuggestions(self, text, start, end):
return []

def isCustomWord(self, word):
return self._dict.is_added(word)

Expand Down Expand Up @@ -422,8 +474,152 @@ def removeWord(self, word):
# Since 6.3.8
self._dict.delete_dictionary_entry(word)

class LanguageToolCache:

def __init__(self, tool, text):
self._length = len(text)
self._matches = self._buildMatches(tool, text)

def getMatches(self):
return self._matches

def _buildMatches(self, tool, text):
matches = []

for match in tool.check(text):
start = match.offset
end = start + match.errorlength

basic_match = BasicMatch(start, end)
basic_match.locqualityissuetype = match.locqualityissuetype
basic_match.replacements = match.replacements
basic_match.msg = match.msg

matches.append(basic_match)

return matches

def update(self, tool, text):
if len(text) != self._length:
self._matches = self._buildMatches(tool, text)

class LanguageToolDictionary(BasicDictionary):

def __init__(self, name):
BasicDictionary.__init__(self, name)

if not (self._lang and self._lang in languagetool.get_languages()):
self._lang = self.getDefaultDictionary()

self._tool = languagetool.LanguageTool(self._lang)
self._cache = {}

@staticmethod
def getLibraryName():
return "LanguageCheck"

@staticmethod
def getLibraryURL():
return "https://pypi.org/project/language-check/"

@staticmethod
def isInstalled():
if languagetool is not None:

# This check, if Java is installed, is necessary to
# make sure LanguageTool can be run without problems.
#
return (os.system('java -version') == 0)

return False

@staticmethod
def availableDictionaries():
if LanguageToolDictionary.isInstalled():
languages = list(languagetool.get_languages())
languages.sort()
return languages
return []

@staticmethod
def getDefaultDictionary():
if not LanguageToolDictionary.isInstalled():
return None

default_locale = languagetool.get_locale_language()
if default_locale and not default_locale in languagetool.get_languages():
default_locale = None

if default_locale is None:
default_locale = QLocale.system().name()
if default_locale is None:
default_locale = self.availableDictionaries()[0]

return default_locale

def checkText(self, text):
matches = []

if len(text) == 0:
return matches

textId = hash(text)
cacheEntry = None

if not textId in self._cache:
cacheEntry = LanguageToolCache(self._tool, text)

self._cache[textId] = cacheEntry
else:
cacheEntry = self._cache[textId]
cacheEntry.update(self._tool, text)

for match in cacheEntry.getMatches():
word = match.getWord(text)

if not (match.locqualityissuetype == 'misspelling' and self.isCustomWord(word)):
matches.append(match)

return matches

def isMisspelled(self, word):
if self.isCustomWord(word):
return False

for match in self.checkText(word):
if match.locqualityissuetype == 'misspelling':
return True

return False

def getSuggestions(self, word):
suggestions = []

for match in self.checkText(word):
suggestions += match.replacements

return suggestions

def findSuggestions(self, text, start, end):
matches = []
checked = self.checkText(text)

if start == end:
# Check for containing area:
for match in checked:
if (start >= match.start and start <= match.end):
matches.append(match)
else:
# Check for overlapping area:
for match in checked:
if (match.end > start and match.start < end):
matches.append(match)

return matches


# Register the implementations in order of priority
Spellchecker.implementations.append(EnchantDictionary)
Spellchecker.registerImplementation(EnchantDictionary)
Spellchecker.registerImplementation(SymSpellDictionary)
Spellchecker.registerImplementation(PySpellcheckerDictionary)
Spellchecker.registerImplementation(LanguageToolDictionary)
84 changes: 55 additions & 29 deletions manuskript/ui/highlighters/basicHighlighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def __init__(self, editor):
QSyntaxHighlighter.__init__(self, editor.document())

self.editor = editor
self._misspelledColor = Qt.red
self._defaultBlockFormat = QTextBlockFormat()
self._defaultCharFormat = QTextCharFormat()
self.defaultTextColor = QColor(S.text)
Expand All @@ -27,6 +26,40 @@ def __init__(self, editor):
self.linkColor = QColor(S.link)
self.spellingErrorColor = QColor(Qt.red)

# Matches during checking can be separated by their type (all of them listed here):
# https://languagetool.org/development/api/org/languagetool/rules/ITSIssueType.html
#
# These are the colors for actual spell-, grammar- and style-checking:
self._errorColors = {
'addition' : QColor(255, 215, 0), # gold
'characters' : QColor(135, 206, 235), # sky blue
'duplication' : QColor(0, 255, 255), # cyan / aqua
'formatting' : QColor(0, 128, 128), # teal
'grammar' : QColor(0, 0, 255), # blue
'inconsistency' : QColor(128, 128, 0), # olive
'inconsistententities' : QColor(46, 139, 87), # sea green
'internationalization' : QColor(255, 165, 0), # orange
'legal' : QColor(255, 69, 0), # orange red
'length' : QColor(47, 79, 79), # dark slate gray
'localespecificcontent' : QColor(188, 143, 143),# rosy brown
'localeviolation' : QColor(128, 0, 0), # maroon
'markup' : QColor(128, 0, 128), # purple
'misspelling' : QColor(255, 0, 0), # red
'mistranslation' : QColor(255, 0, 255), # magenta / fuchsia
'nonconformance' : QColor(255, 218, 185), # peach puff
'numbers' : QColor(65, 105, 225), # royal blue
'omission' : QColor(255, 20, 147), # deep pink
'other' : QColor(138, 43, 226), # blue violet
'patternproblem' : QColor(0, 128, 0), # green
'register' : QColor(112,128,144), # slate gray
'style' : QColor(0, 255, 0), # lime
'terminology' : QColor(0, 0, 128), # navy
'typographical' : QColor(255, 255, 0), # yellow
'uncategorized' : QColor(128, 128, 128), # gray
'untranslated' : QColor(210, 105, 30), # chocolate
'whitespace' : QColor(192, 192, 192) # silver
}

def setDefaultBlockFormat(self, bf):
self._defaultBlockFormat = bf
self.rehighlight()
Expand All @@ -36,7 +69,7 @@ def setDefaultCharFormat(self, cf):
self.rehighlight()

def setMisspelledColor(self, color):
self._misspelledColor = color
self._errorColors['misspelled'] = color

def updateColorScheme(self, rehighlight=True):
"""
Expand Down Expand Up @@ -134,32 +167,25 @@ def highlightBlockAfter(self, text):
txt.end() - txt.start(),
fmt)

# Spell checking

# Following algorithm would not check words at the end of line.
# This hacks adds a space to every line where the text cursor is not
# So that it doesn't spellcheck while typing, but still spellchecks at
# end of lines. See github's issue #166.
textedText = text
if self.currentBlock().position() + len(text) != \
self.editor.textCursor().position():
textedText = text + " "

# Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/
WORDS = r'(?iu)((?:[^_\W]|\')+)[^A-Za-z0-9\']'
# (?iu) means case insensitive and Unicode
# ((?:[^_\W]|\')+) means words exclude underscores but include apostrophes
# [^A-Za-z0-9\'] used with above hack to prevent spellcheck while typing word
#
# See also https://stackoverflow.com/questions/2062169/regex-w-in-utf-8
if hasattr(self.editor, "spellcheck") and self.editor.spellcheck:
for word_object in re.finditer(WORDS, textedText):
if (self.editor._dict
and self.editor._dict.isMisspelled(word_object.group(1))):
format = self.format(word_object.start(1))
format.setUnderlineColor(self._misspelledColor)
if hasattr(self.editor, "spellcheck") and self.editor.spellcheck and self.editor._dict:
# Spell checking

# Following algorithm would not check words at the end of line.
# This hacks adds a space to every line where the text cursor is not
# So that it doesn't spellcheck while typing, but still spellchecks at
# end of lines. See github's issue #166.
textedText = text
if self.currentBlock().position() + len(text) != \
self.editor.textCursor().position():
textedText = text + " "

# The text should only be checked once as a whole
for match in self.editor._dict.checkText(textedText):
if match.locqualityissuetype in self._errorColors:
highlight_color = self._errorColors[match.locqualityissuetype]

format = self.format(match.start)
format.setUnderlineColor(highlight_color)
# SpellCheckUnderline fails with some fonts
format.setUnderlineStyle(QTextCharFormat.WaveUnderline)
self.setFormat(word_object.start(1),
word_object.end(1) - word_object.start(1),
format)
self.setFormat(match.start, match.end - match.start, format)
10 changes: 8 additions & 2 deletions manuskript/ui/views/MDEditCompleter.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,21 +106,27 @@ def popupCompleter(self):
self.completer.popup(self.textUnderCursor(select=True))

def mouseMoveEvent(self, event):
"""
When mouse moves, we show tooltip when appropriate.
"""
self.beginTooltipMoveEvent()
MDEditView.mouseMoveEvent(self, event)
self.endTooltipMoveEvent()

onRef = [r for r in self.refRects if r.contains(event.pos())]

if not onRef:
qApp.restoreOverrideCursor()
QToolTip.hideText()
self.hideTooltip()
return

cursor = self.cursorForPosition(event.pos())
ref = self.refUnderCursor(cursor)
if ref:
if not qApp.overrideCursor():
qApp.setOverrideCursor(Qt.PointingHandCursor)
QToolTip.showText(self.mapToGlobal(event.pos()), Ref.tooltip(ref))

self.showTooltip(self.mapToGlobal(event.pos()), Ref.tooltip(ref))

def mouseReleaseEvent(self, event):
MDEditView.mouseReleaseEvent(self, event)
Expand Down
Loading