Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 11338 #698

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 16 additions & 23 deletions src/main/java/org/jabref/logic/bst/BstPreviewLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@
import java.util.List;

import org.jabref.logic.cleanup.ConvertToBibtexCleanup;
import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter;
import org.jabref.logic.layout.format.RemoveTilde;
import org.jabref.logic.layout.format.LatexToHtmlFormatter;
import org.jabref.logic.preview.PreviewLayout;
import org.jabref.logic.util.StandardFileType;
import org.jabref.model.database.BibDatabaseContext;
Expand All @@ -23,6 +20,8 @@ public final class BstPreviewLayout implements PreviewLayout {

private static final Logger LOGGER = LoggerFactory.getLogger(BstPreviewLayout.class);

private final LatexToHtmlFormatter latexToHtmlFormatter = new LatexToHtmlFormatter();

private final String name;
private String source;
private BstVM bstVM;
Expand Down Expand Up @@ -59,29 +58,23 @@ public String generatePreview(BibEntry originalEntry, BibDatabaseContext databas
BibEntry entry = (BibEntry) originalEntry.clone();
new ConvertToBibtexCleanup().cleanup(entry);
String result = bstVM.render(List.of(entry));
// Remove all comments
result = result.replaceAll("%.*", "");
// Remove all LaTeX comments
// The RemoveLatexCommandsFormatter keeps the words inside latex environments. Therefore, we remove them manually
LOGGER.trace("Render result: {}", result);

// Environment not supported by SnuggleTeX. Therefore, we remove it
result = result.replace("\\begin{thebibliography}{1}", "");
result = result.replace("\\end{thebibliography}", "");
// The RemoveLatexCommandsFormatter keeps the word inside the latex command, but we want to remove that completely
result = result.replaceAll("\\\\bibitem[{].*[}]", "");

// The interesting thing is the text after \bibitem
// result = result.replaceAll(".*\\\\bibitem\\{[^}]*\\}(.*)", "\1");
result = result.replaceAll("(?s).*?\\\\bibitem\\{[^}]*\\}(.*)", "$1");

LOGGER.trace("Without \\bibitem {}", result);

// We want to replace \newblock by a space instead of completely removing it
result = result.replace("\\newblock", " ");
// remove all latex commands statements - assumption: command in a separate line
result = result.replaceAll("(?m)^\\\\.*$", "");
// remove some IEEEtran.bst output (resulting from a multiline \providecommand)
result = result.replace("#2}}", "");
// Have quotes right - and more
result = new LatexToUnicodeFormatter().format(result);
result = result.replace("``", "\"");
result = result.replace("''", "\"");
// Final cleanup
result = new RemoveNewlinesFormatter().format(result);
result = new RemoveLatexCommandsFormatter().format(result);
result = new RemoveTilde().format(result);
result = result.trim().replaceAll(" +", " ");

result = latexToHtmlFormatter.format(result);

return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public class LatexIntegrityChecker implements EntryChecker {
private static final ResourceBundle ERROR_MESSAGES = ENGINE.getPackages().get(0).getErrorMessageBundle();
private static final Set<ErrorCode> EXCLUDED_ERRORS = new HashSet<>();

// if something changes here, please also adapt org.jabref.logic.layout.format.LatexToHtmlFormatter
static {
SnugglePackage snugglePackage = ENGINE.getPackages().get(0);
snugglePackage.addComplexCommand("textgreater", false, 0, TEXT_MODE_ONLY, null, null, null);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package org.jabref.logic.layout.format;

import java.io.IOException;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.layout.LayoutFormatter;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.ed.ph.snuggletex.InputError;
import uk.ac.ed.ph.snuggletex.SnuggleEngine;
import uk.ac.ed.ph.snuggletex.SnuggleInput;
import uk.ac.ed.ph.snuggletex.SnugglePackage;
import uk.ac.ed.ph.snuggletex.SnuggleSession;
import uk.ac.ed.ph.snuggletex.WebPageOutputOptions;

import static uk.ac.ed.ph.snuggletex.definitions.Globals.TEXT_MODE_ONLY;

/**
* This formatter converts LaTeX commands to HTML
*/
public class LatexToHtmlFormatter extends Formatter implements LayoutFormatter {

private static final Logger LOGGER = LoggerFactory.getLogger(LatexToHtmlFormatter.class);

private static final SnuggleEngine ENGINE = new SnuggleEngine();
private static final SnuggleSession SESSION;

// Code adapted from org.jabref.logic.integrity.LatexIntegrityChecker
static {
SnugglePackage snugglePackage = ENGINE.getPackages().get(0);
snugglePackage.addComplexCommand("textgreater", false, 0, TEXT_MODE_ONLY, null, null, null);
snugglePackage.addComplexCommand("textless", false, 0, TEXT_MODE_ONLY, null, null, null);
snugglePackage.addComplexCommand("textbackslash", false, 0, TEXT_MODE_ONLY, null, null, null);
snugglePackage.addComplexCommand("textbar", false, 0, TEXT_MODE_ONLY, null, null, null);
// ENGINE.getPackages().get(0).addComplexCommandOneArg()
// engine.getPackages().get(0).addComplexCommandOneArg("text", false, ALL_MODES,LR, StyleDeclarationInterpretation.NORMALSIZE, null, TextFlowContext.ALLOW_INLINE);

SESSION = ENGINE.createSession();
SESSION.getConfiguration().setFailingFast(true);
}

@Override
public String getName() {
return Localization.lang("LaTeX to HTML");
}

@Override
public String getKey() {
return "latex_to_html";
}

@Override
public String format(String latexInput) {
SESSION.reset();
latexInput = latexInput.replace("\\providecommand", "\\newcommand");
LOGGER.trace("Parsing {}", latexInput);
SnuggleInput input = new SnuggleInput(latexInput);
try {
SESSION.parseInput(input);
} catch (IOException e) {
LOGGER.error("Error at parsing", e);
return latexInput;
}

WebPageOutputOptions webPageOutputOptions = new WebPageOutputOptions();
webPageOutputOptions.setHtml5(true);
String result = SESSION.buildWebPageString(webPageOutputOptions);

if (!SESSION.getErrors().isEmpty()) {
InputError error = SESSION.getErrors().getFirst();
LOGGER.error("Error at parsing", error.toString());
return "Error: " + error;
}

return result;
}

@Override
public String getDescription() {
return Localization.lang("Converts LaTeX encoding to HTML.");
}

@Override
public String getExampleInput() {
return "M{\\\"{o}}nch";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ public String getKey() {

@Override
public String format(String inField) {
// Formatter is not able to handle round braces
inField = inField.replace("\\(", "$");
inField = inField.replace("\\)", "$");
return LatexToUnicodeAdapter.format(inField);
}

Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/tinylog.properties
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ exception = strip: jdk.internal
level@org.jabref.gui.maintable.PersistenceVisualStateTable = debug

level@org.jabref.http.server.Server = debug

level@org.jabref.logic.layout.format.LatexToHtmlFormatter = trace
48 changes: 44 additions & 4 deletions src/test/java/org/jabref/logic/bst/BstPreviewLayoutTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.nio.file.Path;

import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand All @@ -11,7 +10,6 @@
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.mock;

class BstPreviewLayoutTest {

Expand All @@ -22,7 +20,6 @@ public void generatePreviewForSimpleEntryUsingAbbr() throws Exception {
BstPreviewLayout bstPreviewLayout = new BstPreviewLayout(Path.of(BstPreviewLayoutTest.class.getResource("abbrv.bst").toURI()));
BibEntry entry = new BibEntry().withField(StandardField.AUTHOR, "Oliver Kopp")
.withField(StandardField.TITLE, "Thoughts on Development");
BibDatabase bibDatabase = mock(BibDatabase.class);
String preview = bstPreviewLayout.generatePreview(entry, bibDatabaseContext);
assertEquals("O. Kopp. Thoughts on development.", preview);
}
Expand All @@ -33,7 +30,6 @@ public void monthMayIsCorrectlyRendered() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.AUTHOR, "Oliver Kopp")
.withField(StandardField.TITLE, "Thoughts on Development")
.withField(StandardField.MONTH, "#May#");
BibDatabase bibDatabase = mock(BibDatabase.class);
String preview = bstPreviewLayout.generatePreview(entry, bibDatabaseContext);
assertEquals("O. Kopp. Thoughts on development, May.", preview);
}
Expand All @@ -45,6 +41,50 @@ public void generatePreviewForSliceTheoremPaperUsingAbbr() throws Exception {
assertEquals("T. Diez. Slice theorem for fréchet group actions and covariant symplectic field theory. May 2014.", preview);
}

@Test
public void generatePreviewForUnicodeUsingAbbr() throws Exception {
BstPreviewLayout bstPreviewLayout = new BstPreviewLayout(Path.of(BstPreviewLayoutTest.class.getResource("abbrv.bst").toURI()));
String preview = bstPreviewLayout.generatePreview(new BibEntry().withField(StandardField.AUTHOR, "{\\O}ie, Gunvor"), bibDatabaseContext);
assertEquals("G. Øie.", preview);
}

@Test
public void generatePreviewForUnicodeNameUsingIeee() throws Exception {
BstPreviewLayout bstPreviewLayout = new BstPreviewLayout(Path.of(ClassLoader.getSystemResource("bst/IEEEtran.bst").toURI()));
String preview = bstPreviewLayout.generatePreview(new BibEntry().withField(StandardField.AUTHOR, "{\\O}ie, Gunvor"), bibDatabaseContext);
assertEquals("G. Øie.", preview);
}

@Test
public void generatePreviewForUnicodeTitleUsingIeee() throws Exception {
BstPreviewLayout bstPreviewLayout = new BstPreviewLayout(Path.of(ClassLoader.getSystemResource("bst/IEEEtran.bst").toURI()));
String preview = bstPreviewLayout.generatePreview(new BibEntry().withField(StandardField.TITLE, "Linear programming design of semi-digital {FIR} filter and {\\(\\Sigma\\)}{\\(\\Delta\\)} modulator for {VDSL2} transmitter"), bibDatabaseContext);
assertEquals("Linear programming design of semi-digital FIR filter and σδ modulator for VDSL2 transmitter", preview);
}

@Test
public void generatePreviewForComplexEntryUsingIeee() throws Exception {
BstPreviewLayout bstPreviewLayout = new BstPreviewLayout(Path.of(ClassLoader.getSystemResource("bst/IEEEtran.bst").toURI()));

BibEntry testEntry = new BibEntry(StandardEntryType.InProceedings)
.withCitationKey("DBLP:conf/iscas/SadeghifarWG14")
// .withField(StandardField.AUTHOR, "Mohammad Reza Sadeghifar and J. Jacob Wikner and Oscar Gustafsson")
//.withField(StandardField.TITLE, "Linear programming design of semi-digital {FIR} filter and {\\(\\Sigma\\)}{\\(\\Delta\\)} modulator for {VDSL2} transmitter")
.withField(StandardField.BOOKTITLE, "{IEEE} International Symposium on Circuits and Systems, {ISCAS} 2014, Melbourne, Victoria, Australia, June 1-5, 2014")
// .withField(StandardField.PAGES, "2465--2468")
// .withField(StandardField.PUBLISHER, "{IEEE}")
// .withField(StandardField.YEAR, "2014")
// .withField(StandardField.URL, "https://doi.org/10.1109/ISCAS.2014.6865672")
// .withField(StandardField.DOI, "10.1109/ISCAS.2014.6865672")
// .withField(StandardField.TIMESTAMP, "Sat, 05 Sep 2020 18:07:30 +0200")
// .withField(new UnknownField("biburl"), "https://dblp.org/rec/conf/iscas/SadeghifarWG14.bib")
// .withField(new UnknownField("bibsource"), "dblp computer science bibliography, https://dblp.org");
;

String preview = bstPreviewLayout.generatePreview(testEntry, bibDatabaseContext);
assertEquals("Linear programming design of semi-digital FIR filter and σδ modulator for VDSL2 transmitter", preview);
}

@Test
public void generatePreviewForSliceTheoremPaperUsingIEEE() throws Exception {
BstPreviewLayout bstPreviewLayout = new BstPreviewLayout(Path.of(ClassLoader.getSystemResource("bst/IEEEtran.bst").toURI()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package org.jabref.logic.layout.format;

import org.jsoup.Jsoup;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

class LatexToHtmlFormatterTest {

final LatexToHtmlFormatter formatter = new LatexToHtmlFormatter();

@ParameterizedTest(name = "{0}")
// Non-working conversions were filed at https://github.com/davemckain/snuggletex/issues/7
@CsvSource({
"plainFormat, aaa, aaa",
"formatUmlautLi, ä, {\\\"{a}}",
"formatUmlautCa, Ä, {\\\"{A}}",
// "formatUmlautLi, ı, \\i",
// "formatUmlautCi, ı, {\\i}",
"unknownCommandToSpan, '<span class=\"mbox\">-</span>', '\\mbox{-}'",
"formatTextit, <i>text</i>, \\textit{text}",
"escapedDollarSign, $, \\$",
"curlyBracesAreRemoved, test, {test}",
"curlyBracesAreRemovedInLongerText, a longer test there, a longer {test} there",
"longConference, 'IEEE International Symposium on Circuits and Systems, ISCAS 2014, Melbourne, Victoria, Australia, June 1-5, 2014', '{IEEE} International Symposium on Circuits and Systems, {ISCAS} 2014, Melbourne, Victoria, Australia, June 1-5, 2014'",
"longLatexedConferenceKeepsLatexCommands, 'in <em>IEEE International Symposium on Circuits and Systems, ISCAS 2014, Melbourne, Victoria, Australia, June 1-5, 2014.</em>', 'in \\emph{{IEEE} International Symposium on Circuits and Systems, {ISCAS} 2014, Melbourne, Victoria, Australia, June 1-5, 2014.}'",
"formatExample, Mönch, Mönch",
"iWithDiaresisAndUnnecessaryBraces, ï, {\\\"{i}}",
"upperCaseIWithDiaresis, Ï, \\\"{I}",
// "polishName, Łęski, \\L\\k{e}ski",
// "doubleCombiningAccents, ώ, $\\acute{\\omega}$", // disabled, because not supported by SnuggleTeX yet - see https://github.com/davemckain/snuggletex/issues/5
// "combiningAccentsCase1, ḩ, {\\c{h}}",
// "ignoreUnknownCommandWithoutArgument, '', \\aaaa",
// "ignoreUnknownCommandWithArgument, '', \\aaaa{bbbb}",
// "removeUnknownCommandWithEmptyArgument, '', \\aaaa{}",
// "sWithCaron, Š, {\\v{S}}",
// "iWithDiaresisAndEscapedI, ı̈, \\\"{\\i}",
"tildeN, Montaña, Monta\\~{n}a",
// "acuteNLongVersion, Maliński, Mali\\'{n}ski",
// "acuteNLongVersion, MaliŃski, Mali\\'{N}ski",
// "acuteNShortVersion, Maliński, Mali\\'nski",
// "acuteNShortVersion, MaliŃski, Mali\\'Nski",
"apostrophN, Mali’nski, Mali'nski",
"apostrophN, Mali’Nski, Mali'Nski",
"apostrophO, L’oscillation, L'oscillation",
"apostrophC, O’Connor, O'Connor",
// (wrong LaTeX) "preservationOfSingleUnderscore, Lorem ipsum_lorem ipsum, Lorem ipsum_lorem ipsum",
// (wrong LaTeX) "conversionOfUnderscoreWithBraces, Lorem ipsum_(lorem ipsum), Lorem ipsum_{lorem ipsum}",
// "conversionOfOrdinal1st, 1ˢᵗ, 1\\textsuperscript{st}",
// "conversionOfOrdinal2nd, 2ⁿᵈ, 2\\textsuperscript{nd}",
// "conversionOfOrdinal3rd, 3ʳᵈ, 3\\textsuperscript{rd}",
// "conversionOfOrdinal4th, 4ᵗʰ, 4\\textsuperscript{th}",
// "conversionOfOrdinal9th, 9ᵗʰ, 9\\textsuperscript{th}",
// "unicodeNames, 'Øie, Gunvor', '{\\O}ie, Gunvor'"
})
void formatterTest(String name, String expected, String input) {
String htmlResult = formatter.format(input);
String result = Jsoup.parse(htmlResult).body().html();
assertEquals(expected, result);
}

@ParameterizedTest(name = "{0}")
@CsvSource({"equationsSingleSymbol, σ, $\\sigma$",
"equationsMoreComplicatedFormatting, A 32 mA ΣΔ -modulator, A 32~{mA} {$\\Sigma\\Delta$}-modulator",
"equationsMoreComplicatedFormattingSigmaDeltaBraceVariant, Σ Δ, {\\(\\Sigma\\)}{\\(\\Delta\\)}",
"equationsMoreComplicatedFormattingSigmaDeltaDollarVariant, Σ Δ, {{$\\Sigma$}}{{$\\Delta$}}",
"longTitle, Linear programming design of semi-digital FIR filter and Σ Δ modulator for VDSL2 transmitter, Linear programming design of semi-digital {FIR} filter and {\\(\\Sigma\\)}{\\(\\Delta\\)} modulator for {VDSL2} transmitter",
"chi, χ, $\\chi$",
"iWithDiaresis, ï, \\\"{i}"
})
void math(String name, String expected, String input) {
String htmlResult = formatter.format(input);
String result = Jsoup.parse(htmlResult).body().text();
assertEquals(expected, result);
}
}
Loading