Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Grazie implementation #3798

Merged
merged 11 commits into from
Dec 10, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Added

### Fixed
* Fix various issues with the Grazie implementation, in particular default rules for Grazie Pro

## [0.9.10-alpha.2] - 2024-12-05

Expand Down
2 changes: 2 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ dependencies {
bundledPlugin("tanvd.grazi")
plugin("com.firsttimeinforever.intellij.pdf.viewer.intellij-pdf-viewer:0.17.0")
plugin("com.jetbrains.hackathon.indices.viewer:1.28")
// Does not work in tests: https://youtrack.jetbrains.com/issue/GRZ-5023
// plugin("com.intellij.grazie.pro:0.3.347")
}

// Local dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@ package nl.hannahsten.texifyidea.inspections.grazie
import com.intellij.grazie.grammar.strategy.StrategyUtils
import com.intellij.grazie.text.TextContent
import com.intellij.grazie.text.TextExtractor
import com.intellij.lang.tree.util.children
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiWhiteSpace
import com.intellij.psi.util.startOffset
import nl.hannahsten.texifyidea.lang.commands.Argument
import nl.hannahsten.texifyidea.lang.commands.LatexCommand
import nl.hannahsten.texifyidea.psi.*
import nl.hannahsten.texifyidea.util.merge
import nl.hannahsten.texifyidea.util.overlaps
import nl.hannahsten.texifyidea.util.parser.childrenOfType
import nl.hannahsten.texifyidea.util.parser.endOffset
import nl.hannahsten.texifyidea.util.parser.firstParentOfType
import nl.hannahsten.texifyidea.util.parser.parents
import nl.hannahsten.texifyidea.util.parser.*
import nl.hannahsten.texifyidea.util.toTextRange

/**
Expand All @@ -27,6 +26,10 @@ class LatexTextExtractor : TextExtractor() {
return null
}

return buildTextContent(root)
}

fun buildTextContent(root: LatexContent): TextContent? {
// Since Grazie works by first checking leaf elements, and if it gets null tries one level higher, we cannot return anything (e.g. literal for a command, comment for comments) other than LatexContent because then LatexContent itself will not be used as a root.
// However, we do need it as a root because we need to filter out certain things like inline math ourselves, so that we can make sure all the whitespace around ignored items is correct.
val domain = TextContent.TextDomain.PLAIN_TEXT
Expand All @@ -37,29 +40,38 @@ class LatexTextExtractor : TextExtractor() {
.map { TextContent.Exclusion.exclude(it.toTextRange()) }
.filter { it.start >= 0 && it.end <= textContent.length }

return textContent.excludeRanges(stealthyRanges)
val textToSubmit = textContent.excludeRanges(stealthyRanges)
return textToSubmit
}

/**
* Get ranges to ignore.
* Note: IntRange has an inclusive end.
*/
private fun getStealthyRanges(root: PsiElement): List<IntRange> {
fun getStealthyRanges(root: PsiElement): List<IntRange> {
// Getting text takes time, so we only do it once
val rootText = root.text

// Only keep normaltext, assuming other things (like inline math) need to be ignored.
val ranges = (root.childrenOfType(LatexNormalText::class) + root.childrenOfType<LatexParameterText>())
val ranges = (root.childrenOfType(LatexNormalText::class) + root.childrenOfType<LatexParameterText>() + root.childrenOfType<PsiWhiteSpace>())
.asSequence()
.filter { it.isNotInMathEnvironment() && it.isNotInSquareBrackets() }
.filter { !it.inMathContext() && it.isNotInSquareBrackets() }
// Ordering is relevant for whitespace
.sortedBy { it.startOffset }
// Always keep newlines, as they may be the only whitespace splitting consecutive commands
.filter { text -> text !is PsiWhiteSpace || text.text.contains("\n") }
// Skip arguments of non-text commands, but keep arguments of unknown commands, in particular if they are in the middle of a sentence
// Even commends which have no text as argument, for example certain reference commands like auteref, may need to be kept in to get correct punctuation
.filterNot { text -> text is LatexParameterText && LatexCommand.lookup(text.firstParentOfType(LatexCommands::class)?.name)?.firstOrNull()?.arguments?.any { it.type != Argument.Type.TEXT && it.type != Argument.Type.LABEL } == true }
// Environment names are never part of a sentence
.filterNot { text -> text.firstParentOfType<LatexBeginCommand>() != null || text.firstParentOfType<LatexEndCommand>() != null }
// If we encounter an unescaped &, we are in some language construct like a tabular, so we ignore this because ofter a tabular does not contain full sentences
.filter { text -> text.node.children().none { it.elementType == LatexTypes.AMPERSAND } }
// NOTE: it is not allowed to start the text we send to Grazie with a newline! If we do, then Grazie will just not do anything. So we exclude whitespace at the start
.dropWhile { it is PsiWhiteSpace }
// Ranges that we need to keep
// Note that textRangeInParent will not be correct because that's the text range in the direct parent, not in the root
.flatMap { text ->
// Skip arguments of non-text commands
if (text is LatexParameterText && LatexCommand.lookup(text.firstParentOfType(LatexCommands::class)?.name)?.firstOrNull()?.arguments?.any { it.type == Argument.Type.TEXT } != true) {
return@flatMap emptyList()
}

var start = text.textRange.startOffset - root.startOffset
// If LatexNormalText starts after a newline following a command, the newline is not part of the LatexNormalText so we include it manually to make sure that it is seen as a space between sentences
// NOTE: it is not allowed to start the text we send to Grazie with a newline! If we do, then Grazie will just not do anything. So we exclude the newline for the first normal text in the file.
Expand All @@ -71,7 +83,7 @@ class LatexTextExtractor : TextExtractor() {

// -1 Because endOffset is exclusive, but we are working with inclusive end here
var end = text.textRange.endOffset - 1 - root.startOffset
// If LatexNormalText ends, for example because it is followed by a command, we do want to include the space in front of the command, since it is still typeset as a space, which is not true for the space after the command
// If LatexNormalText ends, for example because it is followed by a command, we do want to include the space in front of the command, since it is still typeset as a space, which is not true for the space after the command if the command has no arguments,
// except when the space is followed by inline math, since we ignore inline math altogether (which is probably not correct) we should also ignore the space
if (setOf(' ', '\n').contains(rootText.getOrNull(end + 1)) && rootText.getOrNull(end + 2) != '$') {
end += 1
Expand Down Expand Up @@ -100,14 +112,10 @@ class LatexTextExtractor : TextExtractor() {
ranges.removeAll(overlapped.toSet())
ranges.add(indent.merge(overlapped))
}
// This is approximately (except at the start) the text we send to Grazie
// val text = ranges.sortedBy { it.first }.flatMap { listOf(it.first, it.last) }.toMutableList().also { it.add(0, -1) }
// .chunked(2) { if (it.size > 1) rootText.substring(it[0] + 1, it[1]) else null }

return ranges.sortedBy { it.first }
}

private fun PsiElement.isNotInMathEnvironment() = parents().none { it is LatexMathEnvironment }

private fun PsiElement.isNotInSquareBrackets() = parents().find { it is LatexGroup || it is LatexOptionalParam }
?.let { it is LatexGroup } ?: true
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ enum class LatexGenericRegularCommand(
INDEXSPACE("indexspace"),
INDEX("intex", "entry".asRequired()),
IT("it"),
ITEM("item", "label".asOptional()),
ITEM("item", "label".asOptional(Argument.Type.TEXT)),
ITSHAPE("itshape"),
LABEL("label", "key".asRequired()),
LARGE("large"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import com.intellij.psi.PsiElement
import nl.hannahsten.texifyidea.lang.LatexPackage
import nl.hannahsten.texifyidea.psi.LatexCommands
import nl.hannahsten.texifyidea.psi.LatexParameterText
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
import nl.hannahsten.texifyidea.util.magic.CommandMagic
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
import nl.hannahsten.texifyidea.util.parser.requiredParameters

enum class LatexGlossariesCommand(
Expand Down Expand Up @@ -46,10 +46,10 @@ enum class LatexGlossariesCommand(
"long".asRequired(),
dependency = LatexPackage.GLOSSARIES
),
GLS("gls", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLSUPPER("Gls", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLSPLURAL("glspl", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLSPLURALUPPER("Glspl", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLS("gls", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
GLSUPPER("Gls", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
GLSPLURAL("glspl", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
GLSPLURALUPPER("Glspl", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),

LOADGLSENTRIES(
"loadglsentries",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ import com.intellij.grazie.ide.msg.GrazieStateLifecycle
import com.intellij.grazie.jlanguage.Lang
import com.intellij.grazie.remote.GrazieRemote
import com.intellij.openapi.application.ApplicationManager
import com.intellij.psi.PsiFile
import com.intellij.spellchecker.inspections.SpellCheckingInspection
import com.intellij.testFramework.fixtures.BasePlatformTestCase
import com.intellij.testFramework.fixtures.impl.CodeInsightTestFixtureImpl
import com.intellij.util.messages.Topic
import nl.hannahsten.texifyidea.file.LatexFileType
import nl.hannahsten.texifyidea.psi.LatexContent
import nl.hannahsten.texifyidea.util.parser.firstChildOfType

class GrazieInspectionTest : BasePlatformTestCase() {

Expand All @@ -28,24 +31,40 @@ class GrazieInspectionTest : BasePlatformTestCase() {
}
}

fun testCheckGrammarInConstructs() {
fun testSingleSentence() {
myFixture.configureByText(LatexFileType, """Is these an error with a sentence ${'$'}\xi${'$'} end or not.""")
myFixture.checkHighlighting()
val testName = getTestName(false)
myFixture.configureByFile("$testName.tex")
myFixture.checkHighlighting(true, false, false, true)
}

fun testMultilineCheckGrammar() {
val testName = getTestName(false)
myFixture.configureByFile("$testName.tex")
fun testCommentInText() {
myFixture.configureByText(
LatexFileType,
"""
\begin{document}
All <GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or those are?">those is</GRAMMAR_ERROR> problems in the middle of a sentence.
% <GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or Those are?">Those is</GRAMMAR_ERROR> a problem in a comment
<GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or Those are?">Those is</GRAMMAR_ERROR> a problem at the beginning of a sentence.
\end{document}
""".trimIndent()
)
myFixture.checkHighlighting(true, false, false, true)
}

fun testInlineMath() {
fun testSentenceAtEnvironmentStart() {
myFixture.configureByText(
LatexFileType, """Does Grazie detect ${'$'}m$ as a sentence?"""
LatexFileType,
"""
\begin{document}
<GRAMMAR_ERROR descr="Use An instead of 'A' if the following word starts with a vowel sound, e.g. 'an article', 'an hour'.">A</GRAMMAR_ERROR> apple a day keeps the doctor away.
Some other sentence.
\end{document}
""".trimIndent()
)
myFixture.checkHighlighting(true, false, false, true)
}

fun testInlineMath() {
myFixture.configureByText(LatexFileType, """Does Grazie detect ${'$'}m$ as a sentence?""")
myFixture.checkHighlighting()
}

Expand All @@ -61,9 +80,7 @@ class GrazieInspectionTest : BasePlatformTestCase() {
}

fun testMatchingParens() {
myFixture.configureByText(
LatexFileType, """a (in this case) . aa"""
)
myFixture.configureByText(LatexFileType, """A sentence (in this case). More sentence.""")
myFixture.checkHighlighting()
}

Expand Down Expand Up @@ -122,21 +139,79 @@ class GrazieInspectionTest : BasePlatformTestCase() {
myFixture.checkHighlighting()
}

// Broken in 2023.2 (TEX-177)
// fun testTabular() {
// GrazieRemote.download(Lang.GERMANY_GERMAN)
// GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
// myFixture.configureByText(
// LatexFileType,
// """
// \begin{tabular}{llll}
// ${'$'}a${'$'}: & ${'$'}\mathbb{N}${'$'} & \rightarrow & ${'$'}M${'$'} \\
// \multicolumn{1}{l}{} & ${'$'}n${'$'} & \mapsto & ${'$'}a(n)${'$'}.
// \end{tabular}
//
// Ich bin über die Entwicklung sehr froh.
// """.trimIndent()
// )
// myFixture.checkHighlighting()
// }
fun testGermanGlossaries() {
GrazieRemote.download(Lang.GERMANY_GERMAN)
GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
myFixture.configureByText(
LatexFileType,
"""
Der Hintergrund des Themas der Thesis ist der Umbruch beim Prozess des \gls{api}-Managements.
""".trimIndent()
)
myFixture.checkHighlighting()
}

fun testTabular() {
GrazieRemote.download(Lang.GERMANY_GERMAN)
GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
myFixture.configureByText(
LatexFileType,
"""
\begin{tabular}{llll}
${'$'}a${'$'}: & ${'$'}\mathbb{N}${'$'} & \rightarrow & ${'$'}M${'$'} \\
\multicolumn{1}{l}{} & ${'$'}n${'$'} & \mapsto & ${'$'}a(n)${'$'}.
\end{tabular}

Ich bin über die Entwicklung sehr froh.
""".trimIndent()
)
myFixture.checkHighlighting()
}

/*
* These rules are not enabled by default in Grazie Lite, but do show up by default in Grazie Pro.
* To find a rule id, search for the name in https://community.languagetool.org/rule/list and use the id together with the prefex from LangTool.globalIdPrefix
*/

fun testCommaInSentence() {
GrazieConfig.update { it.copy(userEnabledRules = setOf("LanguageTool.EN.COMMA_PARENTHESIS_WHITESPACE")) }
myFixture.configureByText(LatexFileType, """\label{fig} Similar to the structure presented in \autoref{fig}, it is.""")
myFixture.checkHighlighting()
}

fun testCommandsInSentence() {
GrazieConfig.update { it.copy(userEnabledRules = setOf("LanguageTool.EN.CONSECUTIVE_SPACES")) }
myFixture.configureByText(LatexFileType, """The principles of a generic \ac{PID} controller.""")
myFixture.checkHighlighting()
}

/*
* Grazie Pro
*
* These tests only test false positives in Grazie Pro (com.intellij.grazie.pro.style.StyleInspection), but that is not possible to test at the moment: https://youtrack.jetbrains.com/issue/GRZ-5023
* So we test the excluded ranges directly.
*/

/**
* Text as sent to Grazie.
*/
private fun getSubmittedText(file: PsiFile): String {
return LatexTextExtractor().buildTextContent(file.firstChildOfType(LatexContent::class)!!).toString()
}

fun testNewlinesShouldBeKept() {
val text = """
\section{First}
\section{Second}
""".trimIndent()
myFixture.configureByText(LatexFileType, text)
val submittedText = getSubmittedText(myFixture.file)
assertEquals(
"""
First
Second
""".trimIndent(),
submittedText
)
}
}

This file was deleted.

4 changes: 0 additions & 4 deletions test/resources/inspections/grazie/MultilineCheckGrammar.tex

This file was deleted.

Loading