diff --git a/AUTHORS b/AUTHORS index 05f6f4111da..9c42e1f1bbc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -210,3 +210,4 @@ Yang Zongze Yara Grassi Gouffon Yifan Peng Zhang Liang +Nikita Borovikov diff --git a/CHANGELOG.md b/CHANGELOG.md index 806c9ea2636..41c7dc5fbe7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - The Medline fetcher now normalizes the author names according to the BibTeX-Standard [#4345](https://github.com/JabRef/jabref/issues/4345) - We added an option on the Linked File Viewer to rename the attached file of an entry directly on the JabRef. [#4844](https://github.com/JabRef/jabref/issues/4844) - We added an option in the preference dialog box that allows user to enable helpful tooltips.[#3599](https://github.com/JabRef/jabref/issues/3599) +- We add tool for extracting BibTeX entity from plain text - We moved the dropdown menu for selecting the push-application from the toolbar into the external application preferences. [#674](https://github.com/JabRef/jabref/issues/674) - We removed the alphabetical ordering of the custom tabs and updated the error message when trying to create a general field with a name containing an illegal character. [#5019](https://github.com/JabRef/jabref/issues/5019) - We added a context menu to the bib(la)tex-source-editor to copy'n'paste. [#5007](https://github.com/JabRef/jabref/pull/5007) diff --git a/src/main/java/org/jabref/gui/JabRefFrame.java b/src/main/java/org/jabref/gui/JabRefFrame.java index 4ce6c1c1ffc..890281a3610 100644 --- a/src/main/java/org/jabref/gui/JabRefFrame.java +++ b/src/main/java/org/jabref/gui/JabRefFrame.java @@ -54,6 +54,7 @@ import org.jabref.gui.actions.OldDatabaseCommandWrapper; import org.jabref.gui.actions.SimpleCommand; import org.jabref.gui.actions.StandardActions; +import org.jabref.gui.bibtexextractor.ExtractBibtexAction; import org.jabref.gui.auximport.NewSubLibraryAction; import org.jabref.gui.bibtexkeypattern.BibtexKeyPatternAction; import org.jabref.gui.contentselector.ManageContentSelectorAction; @@ -774,6 +775,7 @@ private MenuBar createMenu() { factory.createMenuItem(StandardActions.FIND_UNLINKED_FILES, new FindUnlinkedFilesAction(this, stateManager)), factory.createMenuItem(StandardActions.WRITE_XMP, new OldDatabaseCommandWrapper(Actions.WRITE_XMP, this, stateManager)), factory.createMenuItem(StandardActions.COPY_LINKED_FILES, new CopyFilesAction(stateManager, this.getDialogService())), + factory.createMenuItem(StandardActions.EXTRACT_BIBTEX, new ExtractBibtexAction(this)), new SeparatorMenuItem(), diff --git a/src/main/java/org/jabref/gui/actions/StandardActions.java b/src/main/java/org/jabref/gui/actions/StandardActions.java index 09f06684fdf..247c2574b11 100644 --- a/src/main/java/org/jabref/gui/actions/StandardActions.java +++ b/src/main/java/org/jabref/gui/actions/StandardActions.java @@ -138,6 +138,7 @@ public enum StandardActions implements Action { DOWNLOAD_FULL_TEXT(Localization.lang("Search full text documents online"), IconTheme.JabRefIcons.FILE_SEARCH, KeyBinding.DOWNLOAD_FULL_TEXT), CLEANUP_ENTRIES(Localization.lang("Cleanup entries"), IconTheme.JabRefIcons.CLEANUP_ENTRIES, KeyBinding.CLEANUP), SET_FILE_LINKS(Localization.lang("Automatically set file links"), KeyBinding.AUTOMATICALLY_LINK_FILES), + EXTRACT_BIBTEX(Localization.lang("Extract BibTeX from plain text")), HELP(Localization.lang("Online help"), IconTheme.JabRefIcons.HELP, KeyBinding.HELP), HELP_KEY_PATTERNS(Localization.lang("Help on key patterns"), IconTheme.JabRefIcons.HELP, KeyBinding.HELP), diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java new file mode 100644 index 00000000000..672bf4fb381 --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java @@ -0,0 +1,181 @@ +package org.jabref.gui.bibtexextractor; + +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BiblatexEntryType; +import org.jabref.model.entry.BiblatexEntryTypes; +import org.jabref.model.entry.FieldName; + +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class BibtexExtractor { + + private final static String authorTag = "[author_tag]"; + private final static String urlTag = "[url_tag]"; + private final static String yearTag = "[year_tag]"; + private final static String pagesTag = "[pages_tag]"; + private final static String titleTag = "[title_tag]"; + private final static String journalTag = "[journal_tag]"; + + private final static String INITIALS_GROUP = "INITIALS"; + private final static String LASTNAME_GROUP = "LASTNAME"; + + private ArrayList urls = new ArrayList<>(); + private ArrayList authors = new ArrayList<>(); + private String year = new String(); + private String pages = new String(); + private String title = new String(); + private boolean isArticle = true; + private String journalOrPublisher = new String(); + + private static final Pattern urlPattern = Pattern.compile( + "(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)" + + "(([\\w\\-]+\\.)+?([\\w\\-.~]+\\/?)*" + + "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + private static final Pattern yearPattern = Pattern.compile( + "\\d{4}", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + private static final Pattern authorPattern1 = Pattern.compile( + "(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+),?\\s(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})" + + "\\s*(and|,|\\.)*", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + private static final Pattern authorPattern2 = Pattern.compile( + "(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+)" + + "\\s*(and|,|\\.)*", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + private static final Pattern pagesPattern = Pattern.compile( + "(p.)?\\s?\\d+(-\\d+)?", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + + public BibEntry extract(String input){ + String inputWithoutUrls = findUrls(input); + String inputWithoutAuthors = findAuthors(inputWithoutUrls); + String inputWithoutYear = findYear(inputWithoutAuthors); + String inputWithoutPages = findPages(inputWithoutYear); + String nonparsed = findParts(inputWithoutPages); + return GenerateEntity(nonparsed); + } + + private BibEntry GenerateEntity(String input){ + BiblatexEntryType type = isArticle ? BiblatexEntryTypes.ARTICLE : BiblatexEntryTypes.BOOK; + BibEntry extractedEntity = new BibEntry(type); + extractedEntity.setField(FieldName.AUTHOR, String.join(" and ", authors)); + extractedEntity.setField(FieldName.URL, String.join(", ", urls)); + extractedEntity.setField(FieldName.YEAR, year); + extractedEntity.setField(FieldName.PAGES, pages); + extractedEntity.setField(FieldName.TITLE, title); + if (isArticle){ + extractedEntity.setField(FieldName.JOURNAL, journalOrPublisher); + } + else { + extractedEntity.setField(FieldName.PUBLISHER, journalOrPublisher); + } + extractedEntity.setField(FieldName.COMMENT, input); + return extractedEntity; + } + + private String findUrls(String input){ + Matcher matcher = urlPattern.matcher(input); + while (matcher.find()) { + urls.add(input.substring(matcher.start(1), matcher.end())); + } + return fixSpaces(matcher.replaceAll(urlTag)); + } + + private String findYear(String input){ + Matcher matcher = yearPattern.matcher(input); + while (matcher.find()){ + String yearCandidate = input.substring(matcher.start(), matcher.end()); + Integer intYearCandidate = Integer.parseInt(yearCandidate); + if (intYearCandidate > 1700 && intYearCandidate <= Calendar.getInstance().get(Calendar.YEAR)){ + year = yearCandidate; + return fixSpaces(input.replace(year, yearTag)); + } + } + return input; + } + + private String findAuthors(String input){ + String currentInput = findAuthorsByPattern(input, authorPattern1); + return findAuthorsByPattern(currentInput, authorPattern2); + } + + private String findAuthorsByPattern(String input, Pattern pattern){ + Matcher matcher = pattern.matcher(input); + while (matcher.find()) { + authors.add(GenerateAuthor(matcher.group(LASTNAME_GROUP), matcher.group(INITIALS_GROUP))); + } + return fixSpaces(matcher.replaceAll(authorTag)); + } + + private String GenerateAuthor(String lastName, String initials){ + return lastName + ", " + initials; + } + + private String findPages(String input){ + Matcher matcher = pagesPattern.matcher(input); + if (matcher.find()){ + pages = input.substring(matcher.start(1), matcher.end()); + } + return fixSpaces(matcher.replaceFirst(pagesTag)); + } + + private String fixSpaces(String input){ + return input.replaceAll("[,.!?;:]", "$0 ") + .replaceAll("\\p{Lt}", " $0") + .replaceAll("\\s+", " ").trim(); + } + + private String findParts(String input) + { + ArrayList lastParts = new ArrayList<>(); + String line = input; + int afterAuthorsIndex = input.lastIndexOf(authorTag); + if (afterAuthorsIndex == -1){ + return input; + } + else { + afterAuthorsIndex += authorTag.length(); + } + int delimiterIndex = input.lastIndexOf("//"); + if (delimiterIndex != -1){ + lastParts.add(input.substring(afterAuthorsIndex, delimiterIndex) + .replace(yearTag, "") + .replace(pagesTag, "")); + lastParts.addAll(Arrays.asList(input.substring(delimiterIndex + 2).split(",|\\."))); + } + + else { + lastParts.addAll(Arrays.asList(input.substring(afterAuthorsIndex).split(",|\\."))); + } + int nonDigitParts = 0; + for (String part: lastParts) { + if (part.matches(".*\\d.*")){ + break; + } + nonDigitParts++; + } + if (nonDigitParts > 0){ + title = lastParts.get(0); + line.replace(title, titleTag); + } + if (nonDigitParts > 1){ + journalOrPublisher = lastParts.get(1); + line.replace(journalOrPublisher, journalTag); + } + if (nonDigitParts > 2){ + isArticle = false; + } + return fixSpaces(line); + } +} diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java new file mode 100644 index 00000000000..9aca54c4e0c --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java @@ -0,0 +1,19 @@ +package org.jabref.gui.bibtexextractor; + +import org.jabref.gui.JabRefFrame; +import org.jabref.gui.actions.SimpleCommand; + +public class ExtractBibtexAction extends SimpleCommand { + + private final JabRefFrame jabRefFrame; + + public ExtractBibtexAction(JabRefFrame jabRefFrame) { + this.jabRefFrame = jabRefFrame; + } + + @Override + public void execute() { + ExtractBibtexDialog dlg = new ExtractBibtexDialog(jabRefFrame); + dlg.showAndWait(); + } +} diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java new file mode 100644 index 00000000000..a8277c9fc1d --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java @@ -0,0 +1,72 @@ +package org.jabref.gui.bibtexextractor; + +import javafx.scene.control.*; +import javafx.scene.layout.VBox; +import org.jabref.Globals; +import org.jabref.gui.JabRefFrame; +import org.jabref.gui.util.BaseDialog; +import org.jabref.logic.l10n.Localization; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BiblatexEntryTypes; +import org.jabref.model.entry.EntryType; + +import java.util.HashMap; +import java.util.Map; + +/** + * GUI Dialog for the feature "Extract BibTeX from plain text". + */ +public class ExtractBibtexDialog extends BaseDialog { + + private final JabRefFrame frame; + private TextArea textArea; + private Button buttonExtract; + + public ExtractBibtexDialog(JabRefFrame frame) { + super(); + this.setTitle(Localization.lang("Input text to parse")); + this.frame = frame; + + initialize(); + } + + private void initialize(){ + textArea = new TextArea(); + textArea.setWrapText(true); + textArea.textProperty() + .addListener((observable, oldValue, newValue) -> buttonExtract.setDisable(newValue.isEmpty())); + + VBox container = new VBox(20); + container.getChildren().addAll( + textArea); + container.setPrefWidth(600); + + ButtonType buttonTypeGenerate = new ButtonType(Localization.lang("Extract"), ButtonBar.ButtonData.OK_DONE); + getDialogPane().getButtonTypes().setAll( + buttonTypeGenerate, + ButtonType.CANCEL + ); + + buttonExtract = (Button) getDialogPane().lookupButton(buttonTypeGenerate); + buttonExtract.setTooltip(new Tooltip((Localization.lang("Starts the extraction of the BibTeX entry")))); + buttonExtract.setDisable(true); + buttonExtract.setOnAction(e -> startExtraction()); + + getDialogPane().setContent(container); + } + + private void startExtraction() + { + BibtexExtractor extractor = new BibtexExtractor(); + BibEntry entity = extractor.extract(textArea.getText()); + trackNewEntry(BiblatexEntryTypes.ARTICLE); + frame.getCurrentBasePanel().insertEntry(entity); + } + + private void trackNewEntry(EntryType type) { + Map properties = new HashMap<>(); + properties.put("EntryType", type.getName()); + + Globals.getTelemetryClient().ifPresent(client -> client.trackEvent("NewEntry", properties, new HashMap<>())); + } +} diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 2d0503a5235..e2d022ee55f 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -2074,6 +2074,12 @@ Accept\ changes=Accept changes Dismiss\ changes=Dismiss changes The\ library\ has\ been\ modified\ by\ another\ program.=The library has been modified by another program. +Extract=Extract +Extract\ BibTeX\ from\ plain\ text= Extract BibTeX from plain text +Input\ text\ for\ parse=Input text to parse +Starts\ the\ extraction\ of\ the\ BibTeX\ entry=Starts the extraction of the BibTeX entry + +Browser=Browser Execute\ command=Execute command Open\ File\ Browser=Open File Browser Use\ default\ file\ browser=Use default file browser