Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 5547 #448

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
4 changes: 0 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,6 @@ dependencies {
implementation group: 'jakarta.xml.bind', name: 'jakarta.xml.bind-api', version: '3.0.1'
implementation group: 'org.glassfish.jaxb', name: 'jaxb-runtime', version: '3.0.2'

implementation ('com.github.tomtung:latex2unicode_2.13:0.3.2') {
exclude module: 'fastparse_2.13'
}

implementation group: 'com.microsoft.azure', name: 'applicationinsights-core', version: '2.4.1'
implementation (group: 'com.microsoft.azure', name: 'applicationinsights-logging-log4j2', version: '2.4.1') {
exclude module: "log4j-core"
Expand Down
2 changes: 1 addition & 1 deletion src/jmh/java/org/jabref/benchmarks/Benchmarks.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import org.jabref.logic.exporter.BibtexDatabaseWriter;
import org.jabref.logic.exporter.SaveConfiguration;
import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.layout.format.HTMLChars;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.search.SearchQuery;
import org.jabref.logic.util.OS;
import org.jabref.model.database.BibDatabase;
Expand Down
2 changes: 0 additions & 2 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@
requires jakarta.inject;
requires reactfx;
requires commons.cli;
requires com.github.tomtung.latex2unicode;
requires fastparse;
requires jbibtex;
requires citeproc.java;
requires de.saxsys.mvvmfx.validation;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.jabref.gui.util.DefaultTaskExecutor;
import org.jabref.gui.util.DroppingMouseLocation;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.groups.DefaultGroupsFactory;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.model.FieldChange;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/gui/texparser/CitationsDisplay.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import org.jabref.gui.icon.IconTheme;
import org.jabref.gui.util.ViewModelListCellFactory;
import org.jabref.model.strings.LatexToUnicodeAdapter;
import org.jabref.model.texparser.Citation;

public class CitationsDisplay extends ListView<Citation> {
Expand All @@ -44,7 +43,9 @@ private Node getDisplayGraphic(Citation item) {
}

Node citationIcon = IconTheme.JabRefIcons.LATEX_COMMENT.getGraphicNode();
Text contextText = new Text(LatexToUnicodeAdapter.format(item.getContext()));
// FIXME
Text contextText = null;
// Text contextText = new Text(LatexToUnicodeAdapter.format(item.getContext()));
contextText.wrappingWidthProperty().bind(this.widthProperty().subtract(85));
HBox contextBox = new HBox(8, citationIcon, contextText);
contextBox.getStyleClass().add("contextBox");
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/bst/BstPreviewLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import java.util.List;

import org.jabref.logic.cleanup.ConvertToBibtexCleanup;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter;
import org.jabref.logic.layout.format.RemoveTilde;
import org.jabref.logic.preview.PreviewLayout;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.logic.citationkeypattern;

import java.math.BigInteger;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -21,6 +20,7 @@

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.formatter.Formatters;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.casechanger.Word;
import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter;
import org.jabref.model.database.BibDatabase;
Expand All @@ -32,7 +32,6 @@
import org.jabref.model.entry.field.FieldFactory;
import org.jabref.model.entry.field.InternalField;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.strings.LatexToUnicodeAdapter;
import org.jabref.model.strings.StringUtil;

import org.slf4j.Logger;
Expand Down Expand Up @@ -84,6 +83,8 @@ public class BracketedPattern {

private static final Pattern WHITESPACE = Pattern.compile("\\p{javaWhitespace}");

private static final LatexToUnicodeFormatter LATEX_TO_UNICODE_FORMATTER = new LatexToUnicodeFormatter();

private enum Institution {
SCHOOL,
DEPARTMENT,
Expand Down Expand Up @@ -513,7 +514,7 @@ public static String getFieldValue(BibEntry entry, String pattern, Character key
}

/**
* Parses the provided string to an {@link AuthorList}, which are then formatted by {@link LatexToUnicodeAdapter}.
* Parses the provided string to an {@link AuthorList}, which are then formatted by {@link LatexToUnicodeFormatter}.
* Afterward, any institutions are formatted into an institution key.
*
* @param unparsedAuthors a string representation of authors or editors
Expand All @@ -526,14 +527,14 @@ private static AuthorList createAuthorList(String unparsedAuthors) {
String lastName = author.getLast()
.map(lastPart -> isInstitution(author) ?
generateInstitutionKey(lastPart) :
LatexToUnicodeAdapter.format(lastPart))
LATEX_TO_UNICODE_FORMATTER.format(lastPart))
.orElse(null);
return new Author(
author.getFirst().map(LatexToUnicodeAdapter::format).orElse(null),
author.getFirstAbbr().map(LatexToUnicodeAdapter::format).orElse(null),
author.getVon().map(LatexToUnicodeAdapter::format).orElse(null),
author.getFirst().map(LATEX_TO_UNICODE_FORMATTER::format).orElse(null),
author.getFirstAbbr().map(LATEX_TO_UNICODE_FORMATTER::format).orElse(null),
author.getVon().map(LATEX_TO_UNICODE_FORMATTER::format).orElse(null),
lastName,
author.getJr().map(LatexToUnicodeAdapter::format).orElse(null));
author.getJr().map(LATEX_TO_UNICODE_FORMATTER::format).orElse(null));
})
.collect(AuthorList.collect());
}
Expand Down Expand Up @@ -1195,14 +1196,10 @@ private static String generateInstitutionKey(String content) {

Matcher matcher = INLINE_ABBREVIATION.matcher(content);
if (matcher.find()) {
return LatexToUnicodeAdapter.format(matcher.group());
return LATEX_TO_UNICODE_FORMATTER.format(matcher.group());
}

Optional<String> unicodeFormattedName = LatexToUnicodeAdapter.parse(content);
if (unicodeFormattedName.isEmpty()) {
LOGGER.warn("{} could not be converted to unicode. This can result in an incorrect or missing institute citation key", content);
}
String result = unicodeFormattedName.orElse(Normalizer.normalize(content, Normalizer.Form.NFC));
String result = LATEX_TO_UNICODE_FORMATTER.format(content);

// Special characters can't be allowed past this point because the citation key generator might replace them with multiple mixed-case characters
result = StringUtil.replaceSpecialCharacters(result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.Optional;
import java.util.Set;

import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter;
import org.jabref.logic.integrity.PagesChecker;
import org.jabref.model.database.BibDatabaseContext;
Expand All @@ -18,7 +19,6 @@
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.strings.LatexToUnicodeAdapter;

import de.undercouch.citeproc.ItemDataProvider;
import de.undercouch.citeproc.bibtex.BibTeXConverter;
Expand All @@ -44,6 +44,8 @@ public class JabRefItemDataProvider implements ItemDataProvider {
private BibEntryTypesManager entryTypesManager;
private PagesChecker pagesChecker;

private LatexToUnicodeFormatter latexToUnicodeFormatter = new LatexToUnicodeFormatter();

public JabRefItemDataProvider() {
stringJsonBuilderFactory = new StringJsonBuilderFactory();
}
Expand Down Expand Up @@ -152,7 +154,7 @@ private CSLItemData bibEntryToCSLItemData(BibEntry originalBibEntry, BibDatabase
for (Field key : fields) {
bibEntry.getResolvedFieldOrAlias(key, bibDatabaseContext.getDatabase())
.map(removeNewlinesFormatter::format)
.map(LatexToUnicodeAdapter::format)
.map(latexToUnicodeFormatter::format)
.ifPresent(value -> {
if (StandardField.MONTH == key) {
// Change month from #mon# to mon because CSL does not support the former format
Expand Down
4 changes: 0 additions & 4 deletions src/main/java/org/jabref/logic/cleanup/CleanupJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,5 @@

@FunctionalInterface
public interface CleanupJob {

/**
* Cleanup the entry.
*/
List<FieldChange> cleanup(BibEntry entry);
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
import org.jabref.logic.formatter.IdentityFormatter;
import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.HtmlToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeDateFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeMonthFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter;
import org.jabref.logic.formatter.bibtexfields.OrdinalsToSuperscriptFormatter;
import org.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.layout.format.ReplaceUnicodeLigaturesFormatter;
import org.jabref.model.FieldChange;
import org.jabref.model.entry.BibEntry;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/formatter/Formatters.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.HtmlToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexCleanupFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeDateFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeMonthFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter;
Expand All @@ -34,7 +35,6 @@
import org.jabref.logic.formatter.casechanger.UpperCaseFormatter;
import org.jabref.logic.formatter.minifier.MinifyNameListFormatter;
import org.jabref.logic.formatter.minifier.TruncateFormatter;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;

public class Formatters {
private static final Pattern TRUNCATE_PATTERN = Pattern.compile("\\Atruncate\\d+\\z");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package org.jabref.logic.layout.format;
package org.jabref.logic.formatter.bibtexfields;

import java.util.Map;
import java.util.Objects;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.layout.LayoutFormatter;
import org.jabref.model.strings.LatexToUnicodeAdapter;
import org.jabref.logic.util.strings.HTMLUnicodeConversionMaps;

/**
* This formatter converts LaTeX character sequences their equivalent unicode characters,
* and removes other LaTeX commands without handling them.
*
* The inverse operation is {@link UnicodeToLatexFormatter}.
*/
public class LatexToUnicodeFormatter extends Formatter implements LayoutFormatter {

Expand All @@ -22,8 +27,19 @@ public String getKey() {
}

@Override
public String format(String inField) {
return LatexToUnicodeAdapter.format(inField);
public String format(String text) {
String result = Objects.requireNonNull(text);
if (result.isEmpty()) {
return result;
}

// Standard symbols
for (Map.Entry<Character, String> unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP
.entrySet()) {
result = result.replace(unicodeLatexPair.getValue(), unicodeLatexPair.getKey().toString());
}

return result;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Map;
import java.text.Normalizer;
import java.util.Objects;

import org.jabref.logic.cleanup.Formatter;
Expand All @@ -11,22 +11,36 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The inverse operation is {@link LatexToUnicodeFormatter}.
*/
public class UnicodeToLatexFormatter extends Formatter implements LayoutFormatter {

private static final Logger LOGGER = LoggerFactory.getLogger(UnicodeToLatexFormatter.class);

@Override
public String format(String text) {
String result = Objects.requireNonNull(text);

if (result.isEmpty()) {
return result;
if (Objects.requireNonNull(text).isEmpty()) {
return text;
}

// Standard symbols
for (Map.Entry<String, String> unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP
.entrySet()) {
result = result.replace(unicodeLatexPair.getKey(), unicodeLatexPair.getValue());
// normalize the unicode characters to cover more cases
String result = Normalizer.normalize(text, Normalizer.Form.NFC);

// Convert single Unicode characters to LaTeX commands
boolean changed = false;
StringBuilder stringBuilder = new StringBuilder();
for (char c : text.toCharArray()) {
String lookup = HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.get(c);
if (lookup == null) {
stringBuilder.append(c);
} else {
stringBuilder.append(lookup);
changed = true;
}
}
if (changed) {
result = stringBuilder.toString();
}

// Combining accents
Expand Down Expand Up @@ -60,7 +74,7 @@ public String format(String text) {
for (int i = 0; i <= (result.length() - 1); i++) {
int cp = result.codePointAt(i);
if (cp >= 129) {
LOGGER.warn("Unicode character not converted: " + cp);
LOGGER.warn("Unicode character not converted: {}", cp);
}
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.jabref.logic.cleanup.FieldFormatterCleanup;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.EntryBasedFetcher;
Expand All @@ -21,7 +22,6 @@
import org.jabref.logic.importer.fetcher.transformers.DefaultLuceneQueryTransformer;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/layout/LayoutEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.Optional;

import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter;
import org.jabref.logic.layout.format.AuthorAbbreviator;
import org.jabref.logic.layout.format.AuthorAndToSemicolonReplacer;
Expand Down Expand Up @@ -58,7 +59,6 @@
import org.jabref.logic.layout.format.Iso690NamesAuthors;
import org.jabref.logic.layout.format.JournalAbbreviator;
import org.jabref.logic.layout.format.LastPage;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.layout.format.MarkdownFormatter;
import org.jabref.logic.layout.format.NameFormatter;
import org.jabref.logic.layout.format.NoSpaceBetweenAbbreviations;
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/jabref/logic/layout/format/XMLChars.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.util.HashMap;
import java.util.Map;

import org.jabref.logic.formatter.bibtexfields.LatexToUnicodeFormatter;
import org.jabref.logic.layout.LayoutFormatter;
import org.jabref.logic.util.strings.XmlCharsMap;

Expand Down
Loading