Skip to content

Commit 5fa1dcf

Browse files
NikodemKchjoeyzgraggenguenesaydinobsluk00
authored
Add option to parse new references from plain text using GROBID… (#5614)
GROBID integration using new fetcher Add the possibility to extract references from plain text using the GROBID service. GROBID is called over a custom server. See also pull request #5614 Co-Authored-By: joeyzgraggen <joeyzgraggen@users.noreply.github.com> Co-Authored-By: guenesaydin <guenesaydin@users.noreply.github.com> Co-Authored-By: obsluk00 <obsluk00@users.noreply.github.com> Co-Authored-By: nikodemkch <nikodemkch@users.noreply.github.com>
1 parent e0e837e commit 5fa1dcf

14 files changed

+357
-41
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
1313

1414
### Changed
1515

16+
- We reintroduced the possibility to extract references from plain text (using GROBID) [#5614](https://github.com/JabRef/jabref/pull/5614)
1617
- We changed the open office panel to show buttons in rows of three instead of going straight down to save space as the button expanded out to take up unnecessary horizontal space. [#5479](https://github.com/JabRef/jabref/issues/5479)
1718
- We cleaned up the group add/edit dialog. [#5826](https://github.com/JabRef/jabref/pull/5826)
1819
- We reintroduced the index column. [#5844](https://github.com/JabRef/jabref/pull/5844)

src/main/java/org/jabref/gui/JabRefFrame.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ private Node createToolbar() {
481481
HBox rightSide = new HBox(
482482
factory.createIconButton(StandardActions.NEW_ARTICLE, new NewEntryAction(this, StandardEntryType.Article, dialogService, Globals.prefs, stateManager)),
483483
factory.createIconButton(StandardActions.NEW_ENTRY, new NewEntryAction(this, dialogService, Globals.prefs, stateManager)),
484+
factory.createIconButton(StandardActions.NEW_ENTRY_FROM_PLAIN_TEXT, new ExtractBibtexAction(stateManager)),
484485
factory.createIconButton(StandardActions.DELETE_ENTRY, new OldDatabaseCommandWrapper(Actions.DELETE, this, stateManager)),
485486
new Separator(Orientation.VERTICAL),
486487
factory.createIconButton(StandardActions.UNDO, new OldDatabaseCommandWrapper(Actions.UNDO, this, stateManager)),
@@ -729,6 +730,7 @@ private MenuBar createMenu() {
729730
//@formatter:off
730731
library.getItems().addAll(
731732
factory.createMenuItem(StandardActions.NEW_ENTRY, new NewEntryAction(this, dialogService, Globals.prefs, stateManager)),
733+
factory.createMenuItem(StandardActions.NEW_ENTRY_FROM_PLAIN_TEXT, new ExtractBibtexAction(stateManager)),
732734
factory.createMenuItem(StandardActions.DELETE_ENTRY, new OldDatabaseCommandWrapper(Actions.DELETE, this, stateManager)),
733735

734736
new SeparatorMenuItem(),
@@ -768,7 +770,6 @@ private MenuBar createMenu() {
768770
factory.createMenuItem(StandardActions.FIND_UNLINKED_FILES, new FindUnlinkedFilesAction(this, stateManager)),
769771
factory.createMenuItem(StandardActions.WRITE_XMP, new OldDatabaseCommandWrapper(Actions.WRITE_XMP, this, stateManager)),
770772
factory.createMenuItem(StandardActions.COPY_LINKED_FILES, new CopyFilesAction(stateManager, this.getDialogService())),
771-
factory.createMenuItem(StandardActions.EXTRACT_BIBTEX, new ExtractBibtexAction(stateManager)),
772773

773774
new SeparatorMenuItem(),
774775

src/main/java/org/jabref/gui/actions/StandardActions.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ public enum StandardActions implements Action {
120120

121121
NEW_ENTRY(Localization.lang("New entry"), IconTheme.JabRefIcons.ADD_ENTRY, KeyBinding.NEW_ENTRY),
122122
NEW_ARTICLE(Localization.lang("New article"), IconTheme.JabRefIcons.ADD_ARTICLE),
123-
NEW_ENTRY_FROM_PLAINTEX(Localization.lang("New entry from plain text"), KeyBinding.NEW_FROM_PLAIN_TEXT),
123+
NEW_ENTRY_FROM_PLAIN_TEXT(Localization.lang("New entry from plain text"), IconTheme.JabRefIcons.NEW_ENTRY_FROM_PLAIN_TEXT, KeyBinding.NEW_ENTRY_FROM_PLAIN_TEXT),
124124
LIBRARY_PROPERTIES(Localization.lang("Library properties")),
125125
EDIT_PREAMBLE(Localization.lang("Edit preamble")),
126126
EDIT_STRINGS(Localization.lang("Edit string constants"), IconTheme.JabRefIcons.EDIT_STRINGS, KeyBinding.EDIT_STRINGS),
@@ -138,7 +138,6 @@ public enum StandardActions implements Action {
138138
DOWNLOAD_FULL_TEXT(Localization.lang("Search full text documents online"), IconTheme.JabRefIcons.FILE_SEARCH, KeyBinding.DOWNLOAD_FULL_TEXT),
139139
CLEANUP_ENTRIES(Localization.lang("Cleanup entries"), IconTheme.JabRefIcons.CLEANUP_ENTRIES, KeyBinding.CLEANUP),
140140
SET_FILE_LINKS(Localization.lang("Automatically set file links"), KeyBinding.AUTOMATICALLY_LINK_FILES),
141-
EXTRACT_BIBTEX(Localization.lang("Extract BibTeX from plain text")),
142141

143142
HELP(Localization.lang("Online help"), IconTheme.JabRefIcons.HELP, KeyBinding.HELP),
144143
HELP_KEY_PATTERNS(Localization.lang("Help on key patterns"), IconTheme.JabRefIcons.HELP, KeyBinding.HELP),

src/main/java/org/jabref/gui/bibtexextractor/BibtexExtractorViewModel.java

+36-16
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,60 @@
33
import java.util.HashMap;
44
import java.util.Map;
55

6+
import javax.swing.undo.UndoManager;
7+
68
import javafx.beans.property.SimpleStringProperty;
79
import javafx.beans.property.StringProperty;
810

911
import org.jabref.Globals;
12+
import org.jabref.gui.DialogService;
13+
import org.jabref.gui.StateManager;
14+
import org.jabref.gui.externalfiles.ImportHandler;
15+
import org.jabref.gui.externalfiletype.ExternalFileTypes;
16+
import org.jabref.gui.util.BackgroundTask;
17+
import org.jabref.gui.util.TaskExecutor;
18+
import org.jabref.logic.importer.fetcher.GrobidCitationFetcher;
19+
import org.jabref.logic.l10n.Localization;
1020
import org.jabref.model.database.BibDatabaseContext;
1121
import org.jabref.model.entry.BibEntry;
12-
import org.jabref.model.entry.types.EntryType;
13-
import org.jabref.model.entry.types.StandardEntryType;
22+
import org.jabref.model.util.FileUpdateMonitor;
23+
import org.jabref.preferences.JabRefPreferences;
1424

1525
public class BibtexExtractorViewModel {
1626

1727
private final StringProperty inputTextProperty = new SimpleStringProperty("");
18-
private final BibDatabaseContext bibdatabaseContext;
19-
20-
public BibtexExtractorViewModel(BibDatabaseContext bibdatabaseContext) {
21-
this.bibdatabaseContext = bibdatabaseContext;
28+
private DialogService dialogService;
29+
private GrobidCitationFetcher currentCitationfetcher;
30+
private TaskExecutor taskExecutor;
31+
private ImportHandler importHandler;
32+
33+
public BibtexExtractorViewModel(BibDatabaseContext bibdatabaseContext, DialogService dialogService,
34+
JabRefPreferences jabRefPreferences, FileUpdateMonitor fileUpdateMonitor, TaskExecutor taskExecutor, UndoManager undoManager, StateManager stateManager) {
35+
this.dialogService = dialogService;
36+
currentCitationfetcher = new GrobidCitationFetcher(jabRefPreferences.getImportFormatPreferences());
37+
this.taskExecutor = taskExecutor;
38+
this.importHandler = new ImportHandler(dialogService, bibdatabaseContext, ExternalFileTypes.getInstance(), jabRefPreferences.getFilePreferences(), jabRefPreferences.getImportFormatPreferences(), jabRefPreferences.getUpdateFieldPreferences(), fileUpdateMonitor, undoManager, stateManager);
2239
}
2340

2441
public StringProperty inputTextProperty() {
2542
return this.inputTextProperty;
2643
}
2744

28-
public void startExtraction() {
29-
30-
BibtexExtractor extractor = new BibtexExtractor();
31-
BibEntry entity = extractor.extract(inputTextProperty.getValue());
32-
this.bibdatabaseContext.getDatabase().insertEntry(entity);
33-
trackNewEntry(StandardEntryType.Article);
45+
public void startParsing() {
46+
BackgroundTask.wrap(() -> currentCitationfetcher.performSearch(inputTextProperty.getValue()))
47+
.onRunning(() -> dialogService.notify(Localization.lang("Your text is being parsed...")))
48+
.onSuccess(parsedEntries -> {
49+
dialogService.notify(Localization.lang("%0 entries were parsed from your query.", String.valueOf(parsedEntries.size())));
50+
importHandler.importEntries(parsedEntries);
51+
for (BibEntry bibEntry : parsedEntries) {
52+
trackNewEntry(bibEntry);
53+
}
54+
}).executeWith(taskExecutor);
3455
}
3556

36-
private void trackNewEntry(EntryType type) {
57+
private void trackNewEntry(BibEntry bibEntry) {
3758
Map<String, String> properties = new HashMap<>();
38-
properties.put("EntryType", type.getName());
39-
40-
Globals.getTelemetryClient().ifPresent(client -> client.trackEvent("NewEntry", properties, new HashMap<>()));
59+
properties.put("EntryType", bibEntry.typeProperty().getValue().getName());
60+
Globals.getTelemetryClient().ifPresent(client -> client.trackEvent("ParseWithGrobid", properties, new HashMap<>()));
4161
}
4262
}

src/main/java/org/jabref/gui/bibtexextractor/ExtractBibtexDialog.fxml

+8-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,17 @@
44
<?import javafx.scene.control.DialogPane?>
55
<?import javafx.scene.control.TextArea?>
66

7+
<?import javafx.scene.layout.VBox?>
8+
79
<DialogPane prefHeight="430.0" prefWidth="586.0" xmlns="http://javafx.com/javafx/8.0.171"
810
xmlns:fx="http://javafx.com/fxml/1" fx:controller="org.jabref.gui.bibtexextractor.ExtractBibtexDialog">
911
<content>
10-
<TextArea fx:id="input" minHeight="-Infinity" prefHeight="350.0" prefWidth="586.0"/>
12+
<VBox fx:id="contentVbox" minHeight="-Infinity" prefHeight="200.0" prefWidth="100.0">
13+
<children>
14+
<TextArea fx:id="input" minHeight="-Infinity" prefHeight="350.0" prefWidth="586.0" wrapText="true"/>
15+
</children>
16+
</VBox>
1117
</content>
12-
<ButtonType fx:id="extractButtonType" buttonData="OK_DONE" text="%Extract"/>
18+
<ButtonType fx:id="parseButtonType" buttonData="OK_DONE" text = "%Add to current library"/>
1319
<ButtonType fx:constant="CANCEL"/>
1420
</DialogPane>
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
package org.jabref.gui.bibtexextractor;
22

33
import javax.inject.Inject;
4+
import javax.swing.undo.UndoManager;
45

56
import javafx.fxml.FXML;
67
import javafx.scene.control.Button;
78
import javafx.scene.control.ButtonType;
89
import javafx.scene.control.TextArea;
910
import javafx.scene.control.Tooltip;
1011

12+
import org.jabref.gui.DialogService;
1113
import org.jabref.gui.StateManager;
1214
import org.jabref.gui.util.BaseDialog;
15+
import org.jabref.gui.util.TaskExecutor;
1316
import org.jabref.logic.l10n.Localization;
1417
import org.jabref.model.database.BibDatabaseContext;
18+
import org.jabref.model.util.FileUpdateMonitor;
19+
import org.jabref.preferences.JabRefPreferences;
1520

1621
import com.airhacks.afterburner.views.ViewLoader;
1722

@@ -20,31 +25,34 @@
2025
*/
2126
public class ExtractBibtexDialog extends BaseDialog<Void> {
2227

23-
private final Button buttonExtract;
28+
private final Button buttonParse;
2429
@FXML private TextArea input;
25-
@FXML private ButtonType extractButtonType;
30+
@FXML private ButtonType parseButtonType;
2631
private BibtexExtractorViewModel viewModel;
27-
2832
@Inject private StateManager stateManager;
33+
@Inject private DialogService dialogService;
34+
@Inject private FileUpdateMonitor fileUpdateMonitor;
35+
@Inject private TaskExecutor taskExecutor;
36+
@Inject private UndoManager undoManager;
2937

3038
public ExtractBibtexDialog() {
31-
3239
ViewLoader.view(this)
3340
.load()
3441
.setAsDialogPane(this);
35-
36-
this.setTitle(Localization.lang("Input text to parse"));
37-
buttonExtract = (Button) getDialogPane().lookupButton(extractButtonType);
38-
buttonExtract.setTooltip(new Tooltip((Localization.lang("Starts the extraction of the BibTeX entry"))));
39-
buttonExtract.setOnAction(e -> viewModel.startExtraction());
40-
buttonExtract.disableProperty().bind(viewModel.inputTextProperty().isEmpty());
42+
this.setTitle(Localization.lang("Plain References Parser"));
43+
input.setPromptText(Localization.lang("Please enter the plain references to extract from separated by double empty lines."));
44+
input.selectAll();
45+
46+
buttonParse = (Button) getDialogPane().lookupButton(parseButtonType);
47+
buttonParse.setTooltip(new Tooltip((Localization.lang("Starts the extraction and adds the resulting entries to the currently opened database"))));
48+
buttonParse.setOnAction(event -> viewModel.startParsing());
49+
buttonParse.disableProperty().bind(viewModel.inputTextProperty().isEmpty());
4150
}
4251

4352
@FXML
4453
private void initialize() {
4554
BibDatabaseContext database = stateManager.getActiveDatabase().orElseThrow(() -> new NullPointerException("Database null"));
46-
this.viewModel = new BibtexExtractorViewModel(database);
47-
55+
this.viewModel = new BibtexExtractorViewModel(database, dialogService, JabRefPreferences.getInstance(), fileUpdateMonitor, taskExecutor,undoManager,stateManager);
4856
input.textProperty().bindBidirectional(viewModel.inputTextProperty());
4957
}
5058
}

src/main/java/org/jabref/gui/icon/IconTheme.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,8 @@ public enum JabRefIcons implements JabRefIcon {
298298
OPEN_ABBREVIATION_LIST(MaterialDesignIcon.FOLDER_OUTLINE),
299299
REMOVE_ABBREVIATION_LIST(MaterialDesignIcon.FOLDER_REMOVE),
300300
ADD_ABBREVIATION(MaterialDesignIcon.PLAYLIST_PLUS),
301-
REMOVE_ABBREVIATION(MaterialDesignIcon.PLAYLIST_MINUS);
301+
REMOVE_ABBREVIATION(MaterialDesignIcon.PLAYLIST_MINUS),
302+
NEW_ENTRY_FROM_PLAIN_TEXT(MaterialDesignIcon.PLUS_BOX);
302303

303304
private final JabRefIcon icon;
304305

src/main/java/org/jabref/gui/keyboard/KeyBinding.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public enum KeyBinding {
4848
NEW_ARTICLE("New article", Localization.lang("New article"), "ctrl+shift+A", KeyBindingCategory.BIBTEX),
4949
NEW_BOOK("New book", Localization.lang("New book"), "ctrl+shift+B", KeyBindingCategory.BIBTEX),
5050
NEW_ENTRY("New entry", Localization.lang("New entry"), "ctrl+N", KeyBindingCategory.BIBTEX),
51-
NEW_FROM_PLAIN_TEXT("New from plain text", Localization.lang("New from plain text"), "ctrl+shift+N", KeyBindingCategory.BIBTEX),
51+
NEW_ENTRY_FROM_PLAIN_TEXT("New entry from plain text", Localization.lang("New entry from plain text"), "ctrl+shift+N", KeyBindingCategory.BIBTEX),
5252
NEW_INBOOK("New inbook", Localization.lang("New inbook"), "ctrl+shift+I", KeyBindingCategory.BIBTEX),
5353
NEW_MASTERSTHESIS("New mastersthesis", Localization.lang("New mastersthesis"), "ctrl+shift+M", KeyBindingCategory.BIBTEX),
5454
NEW_PHDTHESIS("New phdthesis", Localization.lang("New phdthesis"), "ctrl+shift+T", KeyBindingCategory.BIBTEX),

src/main/java/org/jabref/logic/importer/WebFetchers.java

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.jabref.logic.importer.fetcher.DoiFetcher;
1919
import org.jabref.logic.importer.fetcher.DoiResolution;
2020
import org.jabref.logic.importer.fetcher.GoogleScholar;
21+
import org.jabref.logic.importer.fetcher.GrobidCitationFetcher;
2122
import org.jabref.logic.importer.fetcher.GvkFetcher;
2223
import org.jabref.logic.importer.fetcher.IEEE;
2324
import org.jabref.logic.importer.fetcher.INSPIREFetcher;
@@ -98,6 +99,7 @@ public static SortedSet<SearchBasedFetcher> getSearchBasedFetchers(ImportFormatP
9899
set.add(new CiteSeer());
99100
set.add(new DOAJFetcher(importFormatPreferences));
100101
set.add(new IEEE(importFormatPreferences));
102+
set.add(new GrobidCitationFetcher(importFormatPreferences));
101103
return set;
102104
}
103105

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package org.jabref.logic.importer.fetcher;
2+
3+
import java.io.IOException;
4+
import java.util.ArrayList;
5+
import java.util.Arrays;
6+
import java.util.Collections;
7+
import java.util.List;
8+
import java.util.Optional;
9+
import java.util.stream.Collectors;
10+
11+
import org.jabref.logic.importer.ImportFormatPreferences;
12+
import org.jabref.logic.importer.ParseException;
13+
import org.jabref.logic.importer.SearchBasedFetcher;
14+
import org.jabref.logic.importer.fileformat.BibtexParser;
15+
import org.jabref.logic.importer.util.GrobidService;
16+
import org.jabref.model.entry.BibEntry;
17+
import org.jabref.model.util.DummyFileUpdateMonitor;
18+
19+
import org.slf4j.Logger;
20+
import org.slf4j.LoggerFactory;
21+
22+
public class GrobidCitationFetcher implements SearchBasedFetcher {
23+
24+
private static final Logger LOGGER = LoggerFactory.getLogger(GrobidCitationFetcher.class);
25+
private static final String GROBID_URL = "http://grobid.cm.in.tum.de:8070";
26+
private ImportFormatPreferences importFormatPreferences;
27+
private GrobidService grobidService;
28+
29+
public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) {
30+
this.importFormatPreferences = importFormatPreferences;
31+
this.grobidService = new GrobidService(GROBID_URL);
32+
}
33+
34+
/**
35+
* Passes request to grobid server, using consolidateCitations option to improve result.
36+
* Takes a while, since the server has to look up the entry.
37+
* @return A BibTeX-String if extraction is successful and an empty String otherwise.
38+
*/
39+
private String parseUsingGrobid(String plainText) {
40+
try {
41+
return grobidService.processCitation(plainText, GrobidService.ConsolidateCitations.WITH_METADATA);
42+
} catch (IOException e) {
43+
LOGGER.debug("Could not process citation", e);
44+
return "";
45+
}
46+
}
47+
48+
private Optional<BibEntry> parseBibToBibEntry(String bibtexString) {
49+
try {
50+
return BibtexParser.singleFromString(bibtexString,
51+
importFormatPreferences, new DummyFileUpdateMonitor());
52+
} catch (ParseException e) {
53+
return Optional.empty();
54+
}
55+
}
56+
57+
@Override
58+
public List<BibEntry> performSearch(String query) {
59+
List<String> plainReferences = Arrays.stream( query.split( "\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+" ) )
60+
.map(String::trim)
61+
.filter(str -> !str.isBlank())
62+
.collect(Collectors.toCollection(ArrayList::new));
63+
if (plainReferences.isEmpty()) {
64+
return Collections.emptyList();
65+
} else {
66+
return plainReferences.stream()
67+
.map(reference -> parseBibToBibEntry(parseUsingGrobid(reference)))
68+
.flatMap(Optional::stream)
69+
.collect(Collectors.toList());
70+
}
71+
}
72+
73+
@Override
74+
public String getName() {
75+
return "GROBID";
76+
}
77+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package org.jabref.logic.importer.util;
2+
3+
import java.io.IOException;
4+
import java.net.URLEncoder;
5+
import java.nio.charset.StandardCharsets;
6+
7+
import org.jabref.logic.net.URLDownload;
8+
9+
/**
10+
* Implements an API to a GROBID server, as described at
11+
* https://grobid.readthedocs.io/en/latest/Grobid-service/#grobid-web-services
12+
* <p>
13+
* Note: Currently a custom GROBID server is used...
14+
* https://github.com/NikodemKch/grobid
15+
* <p>
16+
* The methods are structured to match the GROBID server api.
17+
* Each method corresponds to a GROBID service request. Only the ones already used are already implemented.
18+
*/
19+
public class GrobidService {
20+
21+
public enum ConsolidateCitations {
22+
NO(0), WITH_METADATA(1), WITH_DOI_ONLY(2);
23+
private int code;
24+
25+
ConsolidateCitations(int code) {
26+
this.code = code;
27+
}
28+
29+
public int getCode() {
30+
return this.code;
31+
}
32+
}
33+
34+
String grobidServerURL;
35+
36+
public GrobidService(String grobidServerURL) {
37+
this.grobidServerURL = grobidServerURL;
38+
}
39+
40+
/**
41+
* @return A BibTeX-String if extraction is successful and an IOException otherwise.
42+
*/
43+
public String processCitation(String rawCitation, ConsolidateCitations consolidateCitations) throws IOException {
44+
rawCitation = URLEncoder.encode(rawCitation, StandardCharsets.UTF_8);
45+
URLDownload urlDownload = new URLDownload(grobidServerURL
46+
+ "/api/processCitation");
47+
//urlDownload.addHeader("Accept", "application/x-bibtex"); //TODO: Uncomment as soon as the default GROBID server is used.
48+
urlDownload.setPostData("citations=" + rawCitation + "&consolidateCitations=" + consolidateCitations);
49+
String httpResponse = urlDownload.asString();
50+
51+
if (httpResponse == null || httpResponse.equals("@misc{-1,\n\n}\n")) { //This filters empty BibTeX entries
52+
throw new IOException("The GROBID server response does not contain anything.");
53+
}
54+
55+
return httpResponse;
56+
}
57+
}

0 commit comments

Comments
 (0)