From 1ecd42fd9b655e255718103e3b9020211692b979 Mon Sep 17 00:00:00 2001 From: Nikita Borovikov Date: Wed, 17 Apr 2019 20:22:51 +0300 Subject: [PATCH 01/14] Added menu item and empty window --- src/main/java/org/jabref/gui/JabRefFrame.java | 2 ++ .../jabref/gui/actions/StandardActions.java | 1 + .../bibtexextractor/ExtractBibtexAction.java | 19 +++++++++++ .../bibtexextractor/ExtractBibtexDialog.java | 33 +++++++++++++++++++ src/main/resources/l10n/JabRef_en.properties | 2 ++ 5 files changed, 57 insertions(+) create mode 100644 src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java create mode 100644 src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java diff --git a/src/main/java/org/jabref/gui/JabRefFrame.java b/src/main/java/org/jabref/gui/JabRefFrame.java index 06ab62cb461..57d581b17ad 100644 --- a/src/main/java/org/jabref/gui/JabRefFrame.java +++ b/src/main/java/org/jabref/gui/JabRefFrame.java @@ -80,6 +80,7 @@ import org.jabref.gui.actions.ShowPreferencesAction; import org.jabref.gui.actions.SimpleCommand; import org.jabref.gui.actions.StandardActions; +import org.jabref.gui.bibtexextractor.ExtractBibtexAction; import org.jabref.gui.dialogs.AutosaveUIManager; import org.jabref.gui.edit.ManageKeywordsAction; import org.jabref.gui.edit.MassSetFieldsAction; @@ -828,6 +829,7 @@ private MenuBar createMenu() { factory.createMenuItem(StandardActions.FIND_UNLINKED_FILES, new FindUnlinkedFilesAction(this)), factory.createMenuItem(StandardActions.WRITE_XMP, new OldDatabaseCommandWrapper(Actions.WRITE_XMP, this, Globals.stateManager)), factory.createMenuItem(StandardActions.COPY_LINKED_FILES, new CopyFilesAction(this)), + factory.createMenuItem(StandardActions.EXTRACT_BIBTEX, new ExtractBibtexAction(this)), new SeparatorMenuItem(), diff --git a/src/main/java/org/jabref/gui/actions/StandardActions.java b/src/main/java/org/jabref/gui/actions/StandardActions.java index 2c91ff5c4b5..ee6230940db 100644 --- a/src/main/java/org/jabref/gui/actions/StandardActions.java +++ b/src/main/java/org/jabref/gui/actions/StandardActions.java @@ -137,6 +137,7 @@ public enum StandardActions implements Action { DOWNLOAD_FULL_TEXT(Localization.lang("Search full text documents online"), IconTheme.JabRefIcons.FILE_SEARCH, KeyBinding.DOWNLOAD_FULL_TEXT), CLEANUP_ENTRIES(Localization.lang("Cleanup entries"), IconTheme.JabRefIcons.CLEANUP_ENTRIES, KeyBinding.CLEANUP), SET_FILE_LINKS(Localization.lang("Automatically set file links"), KeyBinding.AUTOMATICALLY_LINK_FILES), + EXTRACT_BIBTEX(Localization.lang("Extract BibTeX from plain text")), HELP(Localization.lang("Online help"), IconTheme.JabRefIcons.HELP, KeyBinding.HELP), HELP_KEY_PATTERNS(Localization.lang("Help on key patterns"), IconTheme.JabRefIcons.HELP, KeyBinding.HELP), diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java new file mode 100644 index 00000000000..9aca54c4e0c --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java @@ -0,0 +1,19 @@ +package org.jabref.gui.bibtexextractor; + +import org.jabref.gui.JabRefFrame; +import org.jabref.gui.actions.SimpleCommand; + +public class ExtractBibtexAction extends SimpleCommand { + + private final JabRefFrame jabRefFrame; + + public ExtractBibtexAction(JabRefFrame jabRefFrame) { + this.jabRefFrame = jabRefFrame; + } + + @Override + public void execute() { + ExtractBibtexDialog dlg = new ExtractBibtexDialog(jabRefFrame); + dlg.showAndWait(); + } +} diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java new file mode 100644 index 00000000000..c66d4d5b295 --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java @@ -0,0 +1,33 @@ +package org.jabref.gui.bibtexextractor; + +import javafx.scene.control.TextArea; +import javafx.scene.control.TextField; +import javafx.scene.layout.VBox; +import org.jabref.gui.DialogService; +import org.jabref.gui.JabRefFrame; +import org.jabref.gui.util.BaseDialog; +import org.jabref.logic.l10n.Localization; +import org.jabref.model.database.BibDatabaseContext; + +/** + * GUI Dialog for the feature "Extract BibTeX from plain text". + */ +public class ExtractBibtexDialog extends BaseDialog { + + private final JabRefFrame frame; + private TextArea textArea; + + public ExtractBibtexDialog(JabRefFrame frame) { + super(); + this.setTitle(Localization.lang("Input text to parse")); + this.frame = frame; + + initialize(); + } + + private void initialize(){ + VBox container = new VBox(20); + container.setPrefWidth(600); + getDialogPane().setContent(container); + } +} diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 0258d204ee7..91fd1dee7d4 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -2080,7 +2080,9 @@ Open\ entry\ editor=Open entry editor Previous\ citation\ style=Previous citation style Search\ document\ identifier\ online=Search document identifier online Search\ for\ unlinked\ local\ files=Search for unlinked local files +Extract\ BibTeX\ from\ plain\ text= Extract BibTeX from plain text Search\ full\ text\ documents\ online=Search full text documents online +Input\ text\ for\ parse=Input text to parse Find\ and\ replace=Find and replace Found\ documents\:=Found documents\: From f1d9191eafb4425051beb013abf9ee92902d50be Mon Sep 17 00:00:00 2001 From: Nikita Borovikov Date: Tue, 23 Apr 2019 16:55:39 +0300 Subject: [PATCH 02/14] Added creation of new BibTeX entity without any parsing(constant debug content) --- .../gui/bibtexextractor/BibtexExtractor.java | 16 +++++++ .../bibtexextractor/ExtractBibtexDialog.java | 47 +++++++++++++++++-- src/main/resources/l10n/JabRef_en.properties | 7 ++- 3 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java new file mode 100644 index 00000000000..f4f1267a09e --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java @@ -0,0 +1,16 @@ +package org.jabref.gui.bibtexextractor; + +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BiblatexEntryTypes; +import org.jabref.model.entry.FieldName; + +public class BibtexExtractor { + + public BibEntry Extract(String input){ + BibEntry extractedEntity = new BibEntry(BiblatexEntryTypes.ARTICLE); + extractedEntity.setField(FieldName.TITLE, "title"); + extractedEntity.setField(FieldName.ABSTRACT, "all the rest"); + extractedEntity.setField(FieldName.YEAR, "2020"); + return extractedEntity; + } +} diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java index c66d4d5b295..b432549990b 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java @@ -1,13 +1,17 @@ package org.jabref.gui.bibtexextractor; -import javafx.scene.control.TextArea; -import javafx.scene.control.TextField; +import javafx.scene.control.*; import javafx.scene.layout.VBox; -import org.jabref.gui.DialogService; +import org.jabref.Globals; import org.jabref.gui.JabRefFrame; import org.jabref.gui.util.BaseDialog; import org.jabref.logic.l10n.Localization; -import org.jabref.model.database.BibDatabaseContext; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BiblatexEntryTypes; +import org.jabref.model.entry.EntryType; + +import java.util.HashMap; +import java.util.Map; /** * GUI Dialog for the feature "Extract BibTeX from plain text". @@ -16,6 +20,7 @@ public class ExtractBibtexDialog extends BaseDialog { private final JabRefFrame frame; private TextArea textArea; + private Button buttonExtract; public ExtractBibtexDialog(JabRefFrame frame) { super(); @@ -26,8 +31,42 @@ public ExtractBibtexDialog(JabRefFrame frame) { } private void initialize(){ + textArea = new TextArea(); + textArea.setWrapText(true); + textArea.textProperty() + .addListener((observable, oldValue, newValue) -> buttonExtract.setDisable(newValue.isEmpty())); + VBox container = new VBox(20); + container.getChildren().addAll( + textArea); container.setPrefWidth(600); + + ButtonType buttonTypeGenerate = new ButtonType(Localization.lang("Extract"), ButtonBar.ButtonData.OK_DONE); + getDialogPane().getButtonTypes().setAll( + buttonTypeGenerate, + ButtonType.CANCEL + ); + + buttonExtract = (Button) getDialogPane().lookupButton(buttonTypeGenerate); + buttonExtract.setTooltip(new Tooltip((Localization.lang("Starts the extraction of the BibTeX entry")))); + buttonExtract.setDisable(true); + buttonExtract.setOnAction(e -> startExtraction()); + getDialogPane().setContent(container); } + + private void startExtraction() + { + BibtexExtractor extractor = new BibtexExtractor(); + BibEntry entity = extractor.Extract(textArea.getText()); + trackNewEntry(BiblatexEntryTypes.ARTICLE); + frame.getCurrentBasePanel().insertEntry(entity); + } + + private void trackNewEntry(EntryType type) { + Map properties = new HashMap<>(); + properties.put("EntryType", type.getName()); + + Globals.getTelemetryClient().ifPresent(client -> client.trackEvent("NewEntry", properties, new HashMap<>())); + } } diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 91fd1dee7d4..5abe13ec95c 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -2080,9 +2080,7 @@ Open\ entry\ editor=Open entry editor Previous\ citation\ style=Previous citation style Search\ document\ identifier\ online=Search document identifier online Search\ for\ unlinked\ local\ files=Search for unlinked local files -Extract\ BibTeX\ from\ plain\ text= Extract BibTeX from plain text Search\ full\ text\ documents\ online=Search full text documents online -Input\ text\ for\ parse=Input text to parse Find\ and\ replace=Find and replace Found\ documents\:=Found documents\: @@ -2090,3 +2088,8 @@ Use\ selected\ document=Use selected document Accept\ changes=Accept changes Dismiss\ changes=Dismiss changes The\ library\ has\ been\ modified\ by\ another\ program.=The library has been modified by another program. + +Extract=Extract +Extract\ BibTeX\ from\ plain\ text= Extract BibTeX from plain text +Input\ text\ for\ parse=Input text to parse +Starts\ the\ extraction\ of\ the\ BibTeX\ entry=Starts the extraction of the BibTeX entry From ba8edc5af665bde5a2dda8440314f641a3edb2b0 Mon Sep 17 00:00:00 2001 From: Nikita Borovikov Date: Mon, 6 May 2019 08:58:37 +0300 Subject: [PATCH 03/14] Added part of parsing --- .../gui/bibtexextractor/BibtexExtractor.java | 90 +++++++++++++++++-- .../bibtexextractor/ExtractBibtexDialog.java | 2 +- 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java index f4f1267a09e..30359cd363e 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java @@ -3,14 +3,94 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BiblatexEntryTypes; import org.jabref.model.entry.FieldName; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class BibtexExtractor { - public BibEntry Extract(String input){ + private final static String INITIALS_GROUP = "INITIALS"; + private final static String LASTNAME_GROUP = "LASTNAME"; + + private ArrayList urls = new ArrayList<>(); + private ArrayList authors = new ArrayList<>(); + private String year = new String(); + + private static final Pattern urlPattern = Pattern.compile( + "(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)" + + "(([\\w\\-]+\\.)+?([\\w\\-.~]+\\/?)*" + + "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + private static final Pattern yearPattern = Pattern.compile( + "\\d{4}"); + + private static final Pattern authorPattern1 = Pattern.compile( + "(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+),?\\s(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})" + + "\\s*(and|,|\\.)*"); + + private static final Pattern authorPattern2 = Pattern.compile( + "(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+)" + + "\\s*(and|,|\\.)*"); + + public BibEntry extract(String input){ + String inputWithoutUrls = findUrls(input); + String inputWithoutAuthors = findAuthors(inputWithoutUrls); + String inputWithoutYear = findYear(inputWithoutAuthors); + return GenerateEntity(inputWithoutYear); + } + + private BibEntry GenerateEntity(String input){ BibEntry extractedEntity = new BibEntry(BiblatexEntryTypes.ARTICLE); - extractedEntity.setField(FieldName.TITLE, "title"); - extractedEntity.setField(FieldName.ABSTRACT, "all the rest"); - extractedEntity.setField(FieldName.YEAR, "2020"); - return extractedEntity; + extractedEntity.setField(FieldName.AUTHOR, String.join(" and ", authors)); + extractedEntity.setField(FieldName.COMMENT, input); + extractedEntity.setField(FieldName.URL, String.join(", ", urls)); + extractedEntity.setField(FieldName.YEAR, year); + return extractedEntity; + } + + private String findUrls(String input){ + Matcher matcher = urlPattern.matcher(input); + while (matcher.find()) { + urls.add(input.substring(matcher.start(1), matcher.end())); + } + return fixSpaces(matcher.replaceAll("[url_tag]")); + } + + private String findYear(String input){ + Matcher matcher = yearPattern.matcher(input); + while (matcher.find()){ + String yearCandidate = input.substring(matcher.start(), matcher.end()); + Integer intYearCandidate = Integer.parseInt(yearCandidate); + if (intYearCandidate > 1700 && intYearCandidate <= Calendar.getInstance().get(Calendar.YEAR)){ + year = yearCandidate; + return fixSpaces(input.replace(year, "[year_tag]")); + } + } + return input; + } + + private String findAuthors(String input){ + String currentInput = findAuthorsByPattern(input, authorPattern1); + return findAuthorsByPattern(currentInput, authorPattern2); + } + + private String findAuthorsByPattern(String input, Pattern pattern){ + Matcher matcher = pattern.matcher(input); + while (matcher.find()) { + authors.add(GenerateAuthor(matcher.group(LASTNAME_GROUP), matcher.group(INITIALS_GROUP))); + } + return fixSpaces(matcher.replaceAll("[author_tag]")); + } + + private String GenerateAuthor(String lastName, String initials){ + return lastName + ", " + initials; + } + + private String fixSpaces(String input){ + return input.replaceAll("[,.!?;:]", "$0 ") + .replaceAll("\\p{Lt}", " $0") + .replaceAll("\\s+", " ").trim(); } } diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java index b432549990b..a8277c9fc1d 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.java @@ -58,7 +58,7 @@ private void initialize(){ private void startExtraction() { BibtexExtractor extractor = new BibtexExtractor(); - BibEntry entity = extractor.Extract(textArea.getText()); + BibEntry entity = extractor.extract(textArea.getText()); trackNewEntry(BiblatexEntryTypes.ARTICLE); frame.getCurrentBasePanel().insertEntry(entity); } From 47b30343ec1169c572d5aa82f44521aee90051c8 Mon Sep 17 00:00:00 2001 From: Nikita Borovikov Date: Mon, 20 May 2019 11:22:35 +0300 Subject: [PATCH 04/14] Added last parsing part --- .../gui/bibtexextractor/BibtexExtractor.java | 103 ++++++++++++++++-- 1 file changed, 94 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java index 30359cd363e..672bf4fb381 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java @@ -1,21 +1,36 @@ package org.jabref.gui.bibtexextractor; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BiblatexEntryType; import org.jabref.model.entry.BiblatexEntryTypes; import org.jabref.model.entry.FieldName; + +import java.lang.reflect.Array; import java.util.ArrayList; +import java.util.Arrays; import java.util.Calendar; import java.util.regex.Matcher; import java.util.regex.Pattern; public class BibtexExtractor { + private final static String authorTag = "[author_tag]"; + private final static String urlTag = "[url_tag]"; + private final static String yearTag = "[year_tag]"; + private final static String pagesTag = "[pages_tag]"; + private final static String titleTag = "[title_tag]"; + private final static String journalTag = "[journal_tag]"; + private final static String INITIALS_GROUP = "INITIALS"; private final static String LASTNAME_GROUP = "LASTNAME"; private ArrayList urls = new ArrayList<>(); private ArrayList authors = new ArrayList<>(); private String year = new String(); + private String pages = new String(); + private String title = new String(); + private boolean isArticle = true; + private String journalOrPublisher = new String(); private static final Pattern urlPattern = Pattern.compile( "(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)" + @@ -24,29 +39,48 @@ public class BibtexExtractor { Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern yearPattern = Pattern.compile( - "\\d{4}"); + "\\d{4}", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern authorPattern1 = Pattern.compile( "(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+),?\\s(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})" + - "\\s*(and|,|\\.)*"); + "\\s*(and|,|\\.)*", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern authorPattern2 = Pattern.compile( "(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+)" + - "\\s*(and|,|\\.)*"); + "\\s*(and|,|\\.)*", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + private static final Pattern pagesPattern = Pattern.compile( + "(p.)?\\s?\\d+(-\\d+)?", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + public BibEntry extract(String input){ String inputWithoutUrls = findUrls(input); String inputWithoutAuthors = findAuthors(inputWithoutUrls); String inputWithoutYear = findYear(inputWithoutAuthors); - return GenerateEntity(inputWithoutYear); + String inputWithoutPages = findPages(inputWithoutYear); + String nonparsed = findParts(inputWithoutPages); + return GenerateEntity(nonparsed); } private BibEntry GenerateEntity(String input){ - BibEntry extractedEntity = new BibEntry(BiblatexEntryTypes.ARTICLE); + BiblatexEntryType type = isArticle ? BiblatexEntryTypes.ARTICLE : BiblatexEntryTypes.BOOK; + BibEntry extractedEntity = new BibEntry(type); extractedEntity.setField(FieldName.AUTHOR, String.join(" and ", authors)); - extractedEntity.setField(FieldName.COMMENT, input); extractedEntity.setField(FieldName.URL, String.join(", ", urls)); extractedEntity.setField(FieldName.YEAR, year); + extractedEntity.setField(FieldName.PAGES, pages); + extractedEntity.setField(FieldName.TITLE, title); + if (isArticle){ + extractedEntity.setField(FieldName.JOURNAL, journalOrPublisher); + } + else { + extractedEntity.setField(FieldName.PUBLISHER, journalOrPublisher); + } + extractedEntity.setField(FieldName.COMMENT, input); return extractedEntity; } @@ -55,7 +89,7 @@ private String findUrls(String input){ while (matcher.find()) { urls.add(input.substring(matcher.start(1), matcher.end())); } - return fixSpaces(matcher.replaceAll("[url_tag]")); + return fixSpaces(matcher.replaceAll(urlTag)); } private String findYear(String input){ @@ -65,7 +99,7 @@ private String findYear(String input){ Integer intYearCandidate = Integer.parseInt(yearCandidate); if (intYearCandidate > 1700 && intYearCandidate <= Calendar.getInstance().get(Calendar.YEAR)){ year = yearCandidate; - return fixSpaces(input.replace(year, "[year_tag]")); + return fixSpaces(input.replace(year, yearTag)); } } return input; @@ -81,16 +115,67 @@ private String findAuthorsByPattern(String input, Pattern pattern){ while (matcher.find()) { authors.add(GenerateAuthor(matcher.group(LASTNAME_GROUP), matcher.group(INITIALS_GROUP))); } - return fixSpaces(matcher.replaceAll("[author_tag]")); + return fixSpaces(matcher.replaceAll(authorTag)); } private String GenerateAuthor(String lastName, String initials){ return lastName + ", " + initials; } + private String findPages(String input){ + Matcher matcher = pagesPattern.matcher(input); + if (matcher.find()){ + pages = input.substring(matcher.start(1), matcher.end()); + } + return fixSpaces(matcher.replaceFirst(pagesTag)); + } + private String fixSpaces(String input){ return input.replaceAll("[,.!?;:]", "$0 ") .replaceAll("\\p{Lt}", " $0") .replaceAll("\\s+", " ").trim(); } + + private String findParts(String input) + { + ArrayList lastParts = new ArrayList<>(); + String line = input; + int afterAuthorsIndex = input.lastIndexOf(authorTag); + if (afterAuthorsIndex == -1){ + return input; + } + else { + afterAuthorsIndex += authorTag.length(); + } + int delimiterIndex = input.lastIndexOf("//"); + if (delimiterIndex != -1){ + lastParts.add(input.substring(afterAuthorsIndex, delimiterIndex) + .replace(yearTag, "") + .replace(pagesTag, "")); + lastParts.addAll(Arrays.asList(input.substring(delimiterIndex + 2).split(",|\\."))); + } + + else { + lastParts.addAll(Arrays.asList(input.substring(afterAuthorsIndex).split(",|\\."))); + } + int nonDigitParts = 0; + for (String part: lastParts) { + if (part.matches(".*\\d.*")){ + break; + } + nonDigitParts++; + } + if (nonDigitParts > 0){ + title = lastParts.get(0); + line.replace(title, titleTag); + } + if (nonDigitParts > 1){ + journalOrPublisher = lastParts.get(1); + line.replace(journalOrPublisher, journalTag); + } + if (nonDigitParts > 2){ + isArticle = false; + } + return fixSpaces(line); + } } From 6dd5e6bb6b2faba6123b8f3f559c4d4949c8ff6d Mon Sep 17 00:00:00 2001 From: Nikita Borovikov Date: Mon, 20 May 2019 11:37:24 +0300 Subject: [PATCH 05/14] Fixes for pull request --- AUTHORS | 1 + CHANGELOG.md | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 05f6f4111da..9c42e1f1bbc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -210,3 +210,4 @@ Yang Zongze Yara Grassi Gouffon Yifan Peng Zhang Liang +Nikita Borovikov diff --git a/CHANGELOG.md b/CHANGELOG.md index a64512c0723..7077a7ab602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - We changed the title of Group Dialog to "Add subgroup" from "Edit group" when we select Add subgroup option. - We enable import button only if entries are selected. [#4755](https://github.com/JabRef/jabref/issues/4755) - We made modifications to improve contrast of UI elements. [#4583](https://github.com/JabRef/jabref/issues/4583) +- We add tool for extracting BibTeX entity from plain text ### Fixed - We fixed an issue where corresponding groups are sometimes not highlighted when clicking on entries [#3112](https://github.com/JabRef/jabref/issues/3112) From aa8fb26d857da16b2bdda8b2cb7cbe5f341cfc7f Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Tue, 13 Aug 2019 06:17:42 +0200 Subject: [PATCH 06/14] Remove conflict marker --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19673174149..ad8434d9b4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,8 +67,6 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - We added an option in the preference dialog box that allows user to enable helpful tooltips.[#3599](https://github.com/JabRef/jabref/issues/3599) - We add tool for extracting BibTeX entity from plain text ->>>>>>> upstream/master - ### Fixed - We fixed an issue where JabRef died silently for the user without enough inotify instances [#4874](https://github.com/JabRef/jabref/issues/4847) - We fixed an issue where corresponding groups are sometimes not highlighted when clicking on entries [#3112](https://github.com/JabRef/jabref/issues/3112) From 0327051be9175509c72b13fafed69cbf66cd7d58 Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Fri, 23 Aug 2019 23:20:30 +0200 Subject: [PATCH 07/14] Bibtexextractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Followup from รค4985 --- src/main/java/org/jabref/gui/JabRefFrame.java | 2 +- .../gui/bibtexextractor/BibtexExtractor.java | 121 +++++++++--------- .../BibtexExtractorViewModel.java | 44 +++++++ .../bibtexextractor/ExtractBibtexAction.java | 12 +- .../bibtexextractor/ExtractBibtexDialog.fxml | 15 +++ .../bibtexextractor/ExtractBibtexDialog.java | 80 +++++------- 6 files changed, 154 insertions(+), 120 deletions(-) create mode 100644 src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java create mode 100644 src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.fxml diff --git a/src/main/java/org/jabref/gui/JabRefFrame.java b/src/main/java/org/jabref/gui/JabRefFrame.java index c3556aa7802..c59d4c26b0c 100644 --- a/src/main/java/org/jabref/gui/JabRefFrame.java +++ b/src/main/java/org/jabref/gui/JabRefFrame.java @@ -772,7 +772,7 @@ private MenuBar createMenu() { factory.createMenuItem(StandardActions.FIND_UNLINKED_FILES, new FindUnlinkedFilesAction(this, stateManager)), factory.createMenuItem(StandardActions.WRITE_XMP, new OldDatabaseCommandWrapper(Actions.WRITE_XMP, this, stateManager)), factory.createMenuItem(StandardActions.COPY_LINKED_FILES, new CopyFilesAction(stateManager, this.getDialogService())), - factory.createMenuItem(StandardActions.EXTRACT_BIBTEX, new ExtractBibtexAction(this)), + factory.createMenuItem(StandardActions.EXTRACT_BIBTEX, new ExtractBibtexAction(stateManager)), new SeparatorMenuItem(), diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java index 672bf4fb381..30ca3d9efe3 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractor.java @@ -1,17 +1,16 @@ package org.jabref.gui.bibtexextractor; -import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.BiblatexEntryType; -import org.jabref.model.entry.BiblatexEntryTypes; -import org.jabref.model.entry.FieldName; - -import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.EntryType; +import org.jabref.model.entry.StandardEntryType; +import org.jabref.model.entry.field.StandardField; + public class BibtexExtractor { private final static String authorTag = "[author_tag]"; @@ -24,8 +23,8 @@ public class BibtexExtractor { private final static String INITIALS_GROUP = "INITIALS"; private final static String LASTNAME_GROUP = "LASTNAME"; - private ArrayList urls = new ArrayList<>(); - private ArrayList authors = new ArrayList<>(); + private final ArrayList urls = new ArrayList<>(); + private final ArrayList authors = new ArrayList<>(); private String year = new String(); private String pages = new String(); private String title = new String(); @@ -33,58 +32,56 @@ public class BibtexExtractor { private String journalOrPublisher = new String(); private static final Pattern urlPattern = Pattern.compile( - "(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)" + - "(([\\w\\-]+\\.)+?([\\w\\-.~]+\\/?)*" + - "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", - Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + "(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)" + + "(([\\w\\-]+\\.)+?([\\w\\-.~]+\\/?)*" + + "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern yearPattern = Pattern.compile( - "\\d{4}", - Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + "\\d{4}", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern authorPattern1 = Pattern.compile( - "(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+),?\\s(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})" + - "\\s*(and|,|\\.)*", - Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + "(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+),?\\s(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})" + + "\\s*(and|,|\\.)*", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern authorPattern2 = Pattern.compile( - "(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+)" + - "\\s*(and|,|\\.)*", - Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + "(?<" + INITIALS_GROUP + ">(\\p{Lu}\\.\\s){1,2})(?<" + LASTNAME_GROUP + ">\\p{Lu}\\w+)" + + "\\s*(and|,|\\.)*", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final Pattern pagesPattern = Pattern.compile( - "(p.)?\\s?\\d+(-\\d+)?", - Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); - + "(p.)?\\s?\\d+(-\\d+)?", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); - public BibEntry extract(String input){ + public BibEntry extract(String input) { String inputWithoutUrls = findUrls(input); String inputWithoutAuthors = findAuthors(inputWithoutUrls); String inputWithoutYear = findYear(inputWithoutAuthors); String inputWithoutPages = findPages(inputWithoutYear); String nonparsed = findParts(inputWithoutPages); - return GenerateEntity(nonparsed); + return generateEntity(nonparsed); } - private BibEntry GenerateEntity(String input){ - BiblatexEntryType type = isArticle ? BiblatexEntryTypes.ARTICLE : BiblatexEntryTypes.BOOK; + private BibEntry generateEntity(String input) { + EntryType type = isArticle ? StandardEntryType.Article : StandardEntryType.Book; BibEntry extractedEntity = new BibEntry(type); - extractedEntity.setField(FieldName.AUTHOR, String.join(" and ", authors)); - extractedEntity.setField(FieldName.URL, String.join(", ", urls)); - extractedEntity.setField(FieldName.YEAR, year); - extractedEntity.setField(FieldName.PAGES, pages); - extractedEntity.setField(FieldName.TITLE, title); - if (isArticle){ - extractedEntity.setField(FieldName.JOURNAL, journalOrPublisher); + extractedEntity.setField(StandardField.AUTHOR, String.join(" and ", authors)); + extractedEntity.setField(StandardField.URL, String.join(", ", urls)); + extractedEntity.setField(StandardField.YEAR, year); + extractedEntity.setField(StandardField.PAGES, pages); + extractedEntity.setField(StandardField.TITLE, title); + if (isArticle) { + extractedEntity.setField(StandardField.JOURNAL, journalOrPublisher); + } else { + extractedEntity.setField(StandardField.PUBLISHER, journalOrPublisher); } - else { - extractedEntity.setField(FieldName.PUBLISHER, journalOrPublisher); - } - extractedEntity.setField(FieldName.COMMENT, input); - return extractedEntity; + extractedEntity.setField(StandardField.COMMENT, input); + return extractedEntity; } - private String findUrls(String input){ + private String findUrls(String input) { Matcher matcher = urlPattern.matcher(input); while (matcher.find()) { urls.add(input.substring(matcher.start(1), matcher.end())); @@ -92,12 +89,12 @@ private String findUrls(String input){ return fixSpaces(matcher.replaceAll(urlTag)); } - private String findYear(String input){ + private String findYear(String input) { Matcher matcher = yearPattern.matcher(input); - while (matcher.find()){ + while (matcher.find()) { String yearCandidate = input.substring(matcher.start(), matcher.end()); Integer intYearCandidate = Integer.parseInt(yearCandidate); - if (intYearCandidate > 1700 && intYearCandidate <= Calendar.getInstance().get(Calendar.YEAR)){ + if ((intYearCandidate > 1700) && (intYearCandidate <= Calendar.getInstance().get(Calendar.YEAR))) { year = yearCandidate; return fixSpaces(input.replace(year, yearTag)); } @@ -105,12 +102,12 @@ private String findYear(String input){ return input; } - private String findAuthors(String input){ + private String findAuthors(String input) { String currentInput = findAuthorsByPattern(input, authorPattern1); return findAuthorsByPattern(currentInput, authorPattern2); } - private String findAuthorsByPattern(String input, Pattern pattern){ + private String findAuthorsByPattern(String input, Pattern pattern) { Matcher matcher = pattern.matcher(input); while (matcher.find()) { authors.add(GenerateAuthor(matcher.group(LASTNAME_GROUP), matcher.group(INITIALS_GROUP))); @@ -118,40 +115,38 @@ private String findAuthorsByPattern(String input, Pattern pattern){ return fixSpaces(matcher.replaceAll(authorTag)); } - private String GenerateAuthor(String lastName, String initials){ + private String GenerateAuthor(String lastName, String initials) { return lastName + ", " + initials; } - private String findPages(String input){ + private String findPages(String input) { Matcher matcher = pagesPattern.matcher(input); - if (matcher.find()){ - pages = input.substring(matcher.start(1), matcher.end()); + if (matcher.find()) { + pages = input.substring(matcher.start(), matcher.end()); } return fixSpaces(matcher.replaceFirst(pagesTag)); } - private String fixSpaces(String input){ + private String fixSpaces(String input) { return input.replaceAll("[,.!?;:]", "$0 ") .replaceAll("\\p{Lt}", " $0") .replaceAll("\\s+", " ").trim(); } - private String findParts(String input) - { + private String findParts(String input) { ArrayList lastParts = new ArrayList<>(); String line = input; int afterAuthorsIndex = input.lastIndexOf(authorTag); - if (afterAuthorsIndex == -1){ + if (afterAuthorsIndex == -1) { return input; - } - else { + } else { afterAuthorsIndex += authorTag.length(); } int delimiterIndex = input.lastIndexOf("//"); - if (delimiterIndex != -1){ + if (delimiterIndex != -1) { lastParts.add(input.substring(afterAuthorsIndex, delimiterIndex) - .replace(yearTag, "") - .replace(pagesTag, "")); + .replace(yearTag, "") + .replace(pagesTag, "")); lastParts.addAll(Arrays.asList(input.substring(delimiterIndex + 2).split(",|\\."))); } @@ -159,21 +154,21 @@ private String findParts(String input) lastParts.addAll(Arrays.asList(input.substring(afterAuthorsIndex).split(",|\\."))); } int nonDigitParts = 0; - for (String part: lastParts) { - if (part.matches(".*\\d.*")){ + for (String part : lastParts) { + if (part.matches(".*\\d.*")) { break; } nonDigitParts++; } - if (nonDigitParts > 0){ + if (nonDigitParts > 0) { title = lastParts.get(0); line.replace(title, titleTag); } - if (nonDigitParts > 1){ + if (nonDigitParts > 1) { journalOrPublisher = lastParts.get(1); line.replace(journalOrPublisher, journalTag); } - if (nonDigitParts > 2){ + if (nonDigitParts > 2) { isArticle = false; } return fixSpaces(line); diff --git a/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java new file mode 100644 index 00000000000..4fbd8f04bb6 --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java @@ -0,0 +1,44 @@ +package org.jabref.gui.bibtexextractor; + +import java.util.HashMap; +import java.util.Map; + +import javafx.beans.property.SimpleStringProperty; +import javafx.beans.property.StringProperty; + +import org.jabref.Globals; +import org.jabref.model.database.BibDatabaseContext; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.EntryType; +import org.jabref.model.entry.StandardEntryType; + +public class BibtexExtractorViewModel { + + private final StringProperty inputTextProperty = new SimpleStringProperty(""); + private final BibDatabaseContext bibdatabaseContext; + + + public BibtexExtractorViewModel(BibDatabaseContext bibdatabaseContext) { + this.bibdatabaseContext = bibdatabaseContext; + } + + public StringProperty inputTextProperty() { + return this.inputTextProperty; + } + + public void startExtraction() { + + BibtexExtractor extractor = new BibtexExtractor(); + BibEntry entity = extractor.extract(inputTextProperty.getValue()); + this.bibdatabaseContext.getDatabase().insertEntry(entity); + trackNewEntry(StandardEntryType.Article); + + } + + private void trackNewEntry(EntryType type) { + Map properties = new HashMap<>(); + properties.put("EntryType", type.getName()); + + Globals.getTelemetryClient().ifPresent(client -> client.trackEvent("NewEntry", properties, new HashMap<>())); + } +} diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java index 9aca54c4e0c..7610a6d7fa9 100644 --- a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexAction.java @@ -1,19 +1,19 @@ package org.jabref.gui.bibtexextractor; -import org.jabref.gui.JabRefFrame; +import org.jabref.gui.StateManager; import org.jabref.gui.actions.SimpleCommand; -public class ExtractBibtexAction extends SimpleCommand { +import static org.jabref.gui.actions.ActionHelper.needsDatabase; - private final JabRefFrame jabRefFrame; +public class ExtractBibtexAction extends SimpleCommand { - public ExtractBibtexAction(JabRefFrame jabRefFrame) { - this.jabRefFrame = jabRefFrame; + public ExtractBibtexAction(StateManager stateManager) { + this.executable.bind(needsDatabase(stateManager)); } @Override public void execute() { - ExtractBibtexDialog dlg = new ExtractBibtexDialog(jabRefFrame); + ExtractBibtexDialog dlg = new ExtractBibtexDialog(); dlg.showAndWait(); } } diff --git a/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.fxml b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.fxml new file mode 100644 index 00000000000..2929d018527 --- /dev/null +++ b/src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.fxml @@ -0,0 +1,15 @@ + + + + + + + + + + +