diff --git a/CHANGELOG.md b/CHANGELOG.md index 584711a81be..72c639ea0e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,7 +56,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - The `Move linked files to default file directory`-Cleanup operation respects the `File directory pattern` setting - We separated the `Move file` and `Rename Pdfs` logic and context menu entries in the `General`-Tab for the Field `file` to improve the semantics - A scrollbar was added to the cleanup panel, as a result of issue [#2501](https://github.com/JabRef/jabref/issues/2501) -- Using "Look up document identifier" in the quality menu, it is possible to look up DOIs and other identifiers for multiple entries. +- Using "Look up document identifier" in the quality menu, it is possible to look up DOIs, ArXiv ids and other identifiers for multiple entries. - F4 opens selected file in current JTable context not just from selected entry inside the main table [#2355](https://github.com/JabRef/jabref/issues/2355) - We added an option to copy the title of BibTeX entries to the clipboard through `Edit -> Copy title` (implements [#210](https://github.com/koppor/jabref/issues/210)) - Several scrollbars were added to the preference dialog which show up when content is too large [#2559](https://github.com/JabRef/jabref/issues/2559) @@ -66,6 +66,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - The `day` part of the biblatex `date` field is now exported to the corresponding `day` field in MS-Office XML. [#2691](https://github.com/JabRef/jabref/issues/2691) ### Fixed + - We fixed an issue of duplicate keys after using a fetcher, e.g., DOI or ISBN [#2867](https://github.com/JabRef/jabref/issues/2687) + - We fixed an issue that prevented multiple parallel JabRef instances from terminating gracefully. [#2698](https://github.com/JabRef/jabref/issues/2698) - We fixed an issue where authors with multiple surnames were not presented correctly in the main table. [#2534](https://github.com/JabRef/jabref/issues/2534) - Repairs the handling of apostrophes in the LaTeX to unicode conversion. [#2500](https://github.com/JabRef/jabref/issues/2500) - Fix import of journal title in ris format. [#2506](https://github.com/JabRef/jabref/issues/2506) diff --git a/build.gradle b/build.gradle index 4784f4f2bb3..733451e753c 100644 --- a/build.gradle +++ b/build.gradle @@ -140,7 +140,7 @@ dependencies { compile 'com.github.tomtung:latex2unicode_2.12:0.2' testCompile 'junit:junit:4.12' - testCompile 'org.mockito:mockito-core:2.7.19' + testCompile 'org.mockito:mockito-core:2.7.21' testCompile 'com.github.tomakehurst:wiremock:2.5.1' testCompile 'org.assertj:assertj-swing-junit:3.5.0' testCompile 'org.reflections:reflections:0.9.11' diff --git a/src/main/java/org/jabref/JabRefMain.java b/src/main/java/org/jabref/JabRefMain.java index e3504a9fd45..cf7b1440b24 100644 --- a/src/main/java/org/jabref/JabRefMain.java +++ b/src/main/java/org/jabref/JabRefMain.java @@ -116,6 +116,8 @@ private static void start(String[] args) { // So we assume it's all taken care of, and quit. LOGGER.info(Localization.lang("Arguments passed on to running JabRef instance. Shutting down.")); Globals.shutdownThreadPools(); + // needed to tell JavaFx to stop + Platform.exit(); return; } } diff --git a/src/main/java/org/jabref/collab/ChangeScanner.java b/src/main/java/org/jabref/collab/ChangeScanner.java index 9568c6481d6..9c96bd27941 100644 --- a/src/main/java/org/jabref/collab/ChangeScanner.java +++ b/src/main/java/org/jabref/collab/ChangeScanner.java @@ -17,6 +17,7 @@ import org.jabref.JabRefExecutorService; import org.jabref.gui.BasePanel; import org.jabref.gui.JabRefFrame; +import org.jabref.logic.bibtex.DuplicateCheck; import org.jabref.logic.bibtex.comparator.EntryComparator; import org.jabref.logic.exporter.BibDatabaseWriter; import org.jabref.logic.exporter.BibtexDatabaseWriter; @@ -29,7 +30,6 @@ import org.jabref.logic.importer.ParserResult; import org.jabref.logic.l10n.Localization; import org.jabref.model.Defaults; -import org.jabref.model.DuplicateCheck; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.database.EntrySorter; diff --git a/src/main/java/org/jabref/collab/EntryChange.java b/src/main/java/org/jabref/collab/EntryChange.java index 1ea9498f00f..eaeb5d111ec 100644 --- a/src/main/java/org/jabref/collab/EntryChange.java +++ b/src/main/java/org/jabref/collab/EntryChange.java @@ -13,8 +13,8 @@ import org.jabref.gui.BasePanel; import org.jabref.gui.undo.NamedCompound; import org.jabref.gui.undo.UndoableFieldChange; +import org.jabref.logic.bibtex.DuplicateCheck; import org.jabref.logic.l10n.Localization; -import org.jabref.model.DuplicateCheck; import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; diff --git a/src/main/java/org/jabref/collab/EntryDeleteChange.java b/src/main/java/org/jabref/collab/EntryDeleteChange.java index 3d474262f3b..b9b128dc6bb 100644 --- a/src/main/java/org/jabref/collab/EntryDeleteChange.java +++ b/src/main/java/org/jabref/collab/EntryDeleteChange.java @@ -7,8 +7,8 @@ import org.jabref.gui.PreviewPanel; import org.jabref.gui.undo.NamedCompound; import org.jabref.gui.undo.UndoableRemoveEntry; +import org.jabref.logic.bibtex.DuplicateCheck; import org.jabref.logic.l10n.Localization; -import org.jabref.model.DuplicateCheck; import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; diff --git a/src/main/java/org/jabref/gui/DuplicateSearch.java b/src/main/java/org/jabref/gui/DuplicateSearch.java index 532c32c4a8d..28a8795dfbd 100644 --- a/src/main/java/org/jabref/gui/DuplicateSearch.java +++ b/src/main/java/org/jabref/gui/DuplicateSearch.java @@ -14,8 +14,8 @@ import org.jabref.gui.undo.UndoableInsertEntry; import org.jabref.gui.undo.UndoableRemoveEntry; import org.jabref.gui.worker.CallBack; +import org.jabref.logic.bibtex.DuplicateCheck; import org.jabref.logic.l10n.Localization; -import org.jabref.model.DuplicateCheck; import org.jabref.model.entry.BibEntry; import spin.Spin; diff --git a/src/main/java/org/jabref/gui/EntryTypeDialog.java b/src/main/java/org/jabref/gui/EntryTypeDialog.java index ef4c51a467a..159e88b11f8 100644 --- a/src/main/java/org/jabref/gui/EntryTypeDialog.java +++ b/src/main/java/org/jabref/gui/EntryTypeDialog.java @@ -28,6 +28,7 @@ import org.jabref.Globals; import org.jabref.gui.keyboard.KeyBinding; +import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternUtil; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.IdBasedFetcher; import org.jabref.logic.importer.WebFetchers; @@ -96,7 +97,6 @@ private JPanel createEntryGroupsPanel() { if (!customTypes.isEmpty()) { panel.add(createEntryGroupPanel(Localization.lang("Custom"), customTypes)); } - } else { panel.add(createEntryGroupPanel("BibTeX", BibtexEntryTypes.ALL)); panel.add(createEntryGroupPanel("IEEETran", IEEETranEntryTypes.ALL)); @@ -182,7 +182,7 @@ private JPanel createIdFetcherPanel() { JPanel jPanel = new JPanel(); GridBagConstraints constraints = new GridBagConstraints(); - constraints.insets = new Insets(4,4,4,4); + constraints.insets = new Insets(4, 4, 4, 4); GridBagLayout layout = new GridBagLayout(); jPanel.setLayout(layout); @@ -305,15 +305,19 @@ protected void done() { try { Optional result = get(); if (result.isPresent()) { - frame.getCurrentBasePanel().insertEntry(result.get()); + final BibEntry bibEntry = result.get(); + // Regenerate CiteKey of imported BibEntry + BibtexKeyPatternUtil.makeAndSetLabel(Globals.prefs.getBibtexKeyPatternPreferences().getKeyPattern(), frame.getCurrentBasePanel().getDatabase(), bibEntry, Globals.prefs.getBibtexKeyPatternPreferences()); + + frame.getCurrentBasePanel().insertEntry(bibEntry); dispose(); } else if (searchID.trim().isEmpty()) { JOptionPane.showMessageDialog(frame, Localization.lang("The given search ID was empty."), Localization.lang("Empty search ID"), JOptionPane.WARNING_MESSAGE); } else if (!fetcherException) { - JOptionPane.showMessageDialog(frame, Localization.lang("Fetcher_'%0'_did_not_find_an_entry_for_id_'%1'.", fetcher.getName(), searchID)+ "\n" + fetcherExceptionMessage, Localization.lang("No files found."), JOptionPane.WARNING_MESSAGE); + JOptionPane.showMessageDialog(frame, Localization.lang("Fetcher_'%0'_did_not_find_an_entry_for_id_'%1'.", fetcher.getName(), searchID) + "\n" + fetcherExceptionMessage, Localization.lang("No files found."), JOptionPane.WARNING_MESSAGE); } else { JOptionPane.showMessageDialog(frame, - Localization.lang("Error while fetching from %0", fetcher.getName()) +"." + "\n" + fetcherExceptionMessage, + Localization.lang("Error while fetching from %0", fetcher.getName()) + "." + "\n" + fetcherExceptionMessage, Localization.lang("Error"), JOptionPane.ERROR_MESSAGE); } fetcherWorker = new FetcherWorker(); @@ -328,5 +332,4 @@ protected void done() { } } } - } diff --git a/src/main/java/org/jabref/gui/FindUnlinkedFilesDialog.java b/src/main/java/org/jabref/gui/FindUnlinkedFilesDialog.java index 8020ea8c2ca..2f146314fe7 100644 --- a/src/main/java/org/jabref/gui/FindUnlinkedFilesDialog.java +++ b/src/main/java/org/jabref/gui/FindUnlinkedFilesDialog.java @@ -87,10 +87,6 @@ /** * GUI Dialog for the feature "Find unlinked files". - * - * @author Nosh&Dan - * @version 25.11.2008 | 23:13:29 - * */ public class FindUnlinkedFilesDialog extends JDialog { private static final Log LOGGER = LogFactory.getLog(FindUnlinkedFilesDialog.class); @@ -130,7 +126,7 @@ public class FindUnlinkedFilesDialog extends JDialog { private JButton buttonClose; /* Options for the TreeView */ private JButton buttonOptionSelectAll; - private JButton buttonOptionUnselectAll; + private JButton buttonOptionDeselectAll; private JButton buttonOptionExpandAll; private JButton buttonOptionCollapseAll; @@ -711,9 +707,9 @@ public void windowClosing(WindowEvent e) { buttonOptionSelectAll = new JButton(); buttonOptionSelectAll.setMnemonic('A'); buttonOptionSelectAll.setAction(actionSelectAll); - buttonOptionUnselectAll = new JButton(); - buttonOptionUnselectAll.setMnemonic('U'); - buttonOptionUnselectAll.setAction(actionUnselectAll); + buttonOptionDeselectAll = new JButton(); + buttonOptionDeselectAll.setMnemonic('U'); + buttonOptionDeselectAll.setAction(actionUnselectAll); buttonOptionExpandAll = new JButton(); buttonOptionExpandAll.setMnemonic('E'); buttonOptionExpandAll.setAction(actionExpandTree); @@ -809,7 +805,7 @@ GridBagConstraints.HORIZONTAL, GridBagConstraints.WEST, new Insets(18, 3, 18, 6) GridBagConstraints.NORTHEAST, basicInsets, 1, 1, 1, 1, 0, 0, 0, 0); FindUnlinkedFilesDialog.addComponent(gbl, panelOptions, buttonOptionSelectAll, GridBagConstraints.HORIZONTAL, GridBagConstraints.NORTH, noInsets, 0, 0, 1, 1, 1, 0, 0, 0); - FindUnlinkedFilesDialog.addComponent(gbl, panelOptions, buttonOptionUnselectAll, GridBagConstraints.HORIZONTAL, + FindUnlinkedFilesDialog.addComponent(gbl, panelOptions, buttonOptionDeselectAll, GridBagConstraints.HORIZONTAL, GridBagConstraints.NORTH, noInsets, 0, 1, 1, 1, 0, 0, 0, 0); FindUnlinkedFilesDialog.addComponent(gbl, panelOptions, buttonOptionExpandAll, GridBagConstraints.HORIZONTAL, GridBagConstraints.NORTH, new Insets(6, 0, 0, 0), 0, 2, 1, 1, 0, 0, 0, 0); diff --git a/src/main/java/org/jabref/gui/JabRefFrame.java b/src/main/java/org/jabref/gui/JabRefFrame.java index 998c692360a..95494891112 100644 --- a/src/main/java/org/jabref/gui/JabRefFrame.java +++ b/src/main/java/org/jabref/gui/JabRefFrame.java @@ -1177,7 +1177,7 @@ private void fillMenu() { quality.add(findUnlinkedFiles); quality.add(autoLinkFile); - for (IdFetcher fetcher : WebFetchers.getIdFetchers()) { + for (IdFetcher fetcher : WebFetchers.getIdFetchers(Globals.prefs.getImportFormatPreferences())) { lookupIdentifiers.add(new LookupIdentifierAction(this, fetcher)); } quality.add(lookupIdentifiers); diff --git a/src/main/java/org/jabref/gui/importer/ImportInspectionDialog.java b/src/main/java/org/jabref/gui/importer/ImportInspectionDialog.java index 3781328ee1c..a6c6d65248a 100644 --- a/src/main/java/org/jabref/gui/importer/ImportInspectionDialog.java +++ b/src/main/java/org/jabref/gui/importer/ImportInspectionDialog.java @@ -75,6 +75,7 @@ import org.jabref.gui.undo.UndoableRemoveEntry; import org.jabref.gui.util.comparator.IconComparator; import org.jabref.gui.util.component.CheckBoxMessage; +import org.jabref.logic.bibtex.DuplicateCheck; import org.jabref.logic.bibtex.comparator.FieldComparator; import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternUtil; import org.jabref.logic.help.HelpFile; @@ -83,7 +84,6 @@ import org.jabref.logic.l10n.Localization; import org.jabref.logic.util.UpdateField; import org.jabref.model.Defaults; -import org.jabref.model.DuplicateCheck; import org.jabref.model.FieldChange; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseContext; diff --git a/src/main/java/org/jabref/gui/importer/UnlinkedFilesCrawler.java b/src/main/java/org/jabref/gui/importer/UnlinkedFilesCrawler.java index fe409ff66c7..f3b456fa937 100644 --- a/src/main/java/org/jabref/gui/importer/UnlinkedFilesCrawler.java +++ b/src/main/java/org/jabref/gui/importer/UnlinkedFilesCrawler.java @@ -54,7 +54,7 @@ public CheckableTreeNode searchDirectory(File directory, FileFilter filter) { * resolve its recursion and return what it has saved so far. */ public CheckableTreeNode searchDirectory(File directory, UnlinkedPDFFileFilter ff, AtomicBoolean state, ChangeListener changeListener) { - /* Cancelation of the search from outside! */ + /* Cancellation of the search from outside! */ if ((state == null) || !state.get()) { return null; } diff --git a/src/main/java/org/jabref/gui/importer/fetcher/IdBasedEntryFetcher.java b/src/main/java/org/jabref/gui/importer/fetcher/IdBasedEntryFetcher.java deleted file mode 100644 index 13e93e64d4e..00000000000 --- a/src/main/java/org/jabref/gui/importer/fetcher/IdBasedEntryFetcher.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.jabref.gui.importer.fetcher; - -import java.util.Objects; -import java.util.Optional; - -import javax.swing.JPanel; - -import org.jabref.gui.importer.ImportInspectionDialog; -import org.jabref.logic.help.HelpFile; -import org.jabref.logic.importer.FetcherException; -import org.jabref.logic.importer.IdBasedFetcher; -import org.jabref.logic.importer.ImportInspector; -import org.jabref.logic.importer.OutputPrinter; -import org.jabref.logic.l10n.Localization; -import org.jabref.model.entry.BibEntry; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -public class IdBasedEntryFetcher implements EntryFetcher { - - private static final Log LOGGER = LogFactory.getLog(IdBasedEntryFetcher.class); - private final IdBasedFetcher fetcher; - - public IdBasedEntryFetcher(IdBasedFetcher fetcher) { - this.fetcher = Objects.requireNonNull(fetcher); - } - - @Override - public boolean processQuery(String query, ImportInspector inspector, OutputPrinter status) { - - status.setStatus(Localization.lang("Processing %0", query)); - try { - Optional match = fetcher.performSearchById(query); - match.ifPresent(inspector::addEntry); - return match.isPresent(); - } catch (FetcherException e) { - LOGGER.error("Error while fetching from " + getTitle(), e); - ((ImportInspectionDialog)inspector).showErrorMessage(this.getTitle(), e.getLocalizedMessage()); - } - - return false; - } - - @Override - public String getTitle() { - return fetcher.getName(); - } - - @Override - public HelpFile getHelpPage() { - return fetcher.getHelpPage(); - } - - @Override - public JPanel getOptionsPanel() { - // not supported - return null; - } - - @Override - public void stopFetching() { - // not supported - } -} diff --git a/src/main/java/org/jabref/model/DuplicateCheck.java b/src/main/java/org/jabref/logic/bibtex/DuplicateCheck.java similarity index 86% rename from src/main/java/org/jabref/model/DuplicateCheck.java rename to src/main/java/org/jabref/logic/bibtex/DuplicateCheck.java index de68ddfd6a9..b0773a11c37 100644 --- a/src/main/java/org/jabref/model/DuplicateCheck.java +++ b/src/main/java/org/jabref/logic/bibtex/DuplicateCheck.java @@ -1,4 +1,4 @@ -package org.jabref.model; +package org.jabref.logic.bibtex; import java.util.HashMap; import java.util.HashSet; @@ -8,6 +8,8 @@ import java.util.Optional; import java.util.Set; +import org.jabref.logic.util.strings.StringSimilarity; +import org.jabref.model.EntryTypes; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseMode; import org.jabref.model.entry.AuthorList; @@ -24,12 +26,10 @@ * This class contains utility method for duplicate checking of entries. */ public class DuplicateCheck { - private static final Log LOGGER = LogFactory.getLog(DuplicateCheck.class); /* * Integer values for indicating result of duplicate check (for entries): - * */ private static final int NOT_EQUAL = 0; private static final int EQUAL = 1; @@ -47,7 +47,6 @@ public class DuplicateCheck { // Extra weighting of those fields that are most likely to provide correct duplicate detection: private static final Map FIELD_WEIGHTS = new HashMap<>(); - static { DuplicateCheck.FIELD_WEIGHTS.put(FieldName.AUTHOR, 2.5); DuplicateCheck.FIELD_WEIGHTS.put(FieldName.EDITOR, 2.5); @@ -55,8 +54,7 @@ public class DuplicateCheck { DuplicateCheck.FIELD_WEIGHTS.put(FieldName.JOURNAL, 2.); } - private DuplicateCheck() { - } + private DuplicateCheck() {} /** * Checks if the two entries represent the same publication. @@ -66,13 +64,17 @@ private DuplicateCheck() { * @return boolean */ public static boolean isDuplicate(BibEntry one, BibEntry two, BibDatabaseMode bibDatabaseMode) { + // same identifier + if (hasSameIdentifier(one, two)) { + return true; + } - // First check if they are of the same type - a necessary condition: + // same entry type if (!one.getType().equals(two.getType())) { return false; } - EntryType type = EntryTypes.getTypeOrDefault(one.getType(), bibDatabaseMode); + EntryType type = EntryTypes.getTypeOrDefault(one.getType(), bibDatabaseMode); // The check if they have the same required fields: List var = type.getRequiredFieldsFlat(); double[] req; @@ -96,6 +98,15 @@ public static boolean isDuplicate(BibEntry one, BibEntry two, BibDatabaseMode bi return req[0] >= DuplicateCheck.duplicateThreshold; } + private static boolean hasSameIdentifier(BibEntry one, BibEntry two) { + for (String name : FieldName.getIdentifierFieldNames()) { + if (one.getField(name).isPresent() && one.getField(name).equals(two.getField(name))) { + return true; + } + } + return false; + } + private static double[] compareFieldSet(List fields, BibEntry one, BibEntry two) { double res = 0; double totWeights = 0.; @@ -249,51 +260,18 @@ private static double similarity(String s1, String s2) { String longer = s1; String shorter = s2; - if (s1.length() < s2.length()) { // longer should always have greater length + if (s1.length() < s2.length()) { longer = s2; shorter = s1; } + int longerLength = longer.length(); + // both strings are zero length if (longerLength == 0) { return 1.0; - /* both strings are zero length */ } - double sim = (longerLength - editDistance(longer, shorter)) / (double) longerLength; + } + double sim = (longerLength - new StringSimilarity().editDistanceIgnoreCase(longer, shorter)) / (double) longerLength; LOGGER.debug("Longer string: " + longer + " Shorter string: " + shorter + " Similarity: " + sim); return sim; - - } - - /* - * Levenshtein Edit Distance - * http://stackoverflow.com/questions/955110/similarity-string-comparison-in-java - */ - private static int editDistance(String s1, String s2) { - String s1LowerCase = s1.toLowerCase(Locale.ROOT); - String s2LowerCase = s2.toLowerCase(Locale.ROOT); - - int[] costs = new int[s2LowerCase.length() + 1]; - for (int i = 0; i <= s1LowerCase.length(); i++) { - int lastValue = i; - for (int j = 0; j <= s2LowerCase.length(); j++) { - if (i == 0) { - costs[j] = j; - } else if (j > 0) { - int newValue = costs[j - 1]; - if (s1LowerCase.charAt(i - 1) != s2LowerCase.charAt(j - 1)) { - newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; - } - costs[j - 1] = lastValue; - lastValue = newValue; - - } - } - if (i > 0) { - costs[s2LowerCase.length()] = lastValue; - } - } - LOGGER.debug("String 1: " + s1LowerCase + " String 2: " + s2LowerCase + " Distance: " + costs[s2LowerCase.length()]); - return costs[s2LowerCase.length()]; } - - } diff --git a/src/main/java/org/jabref/logic/importer/IdParserFetcher.java b/src/main/java/org/jabref/logic/importer/IdParserFetcher.java index 89d5a5568d0..f9608feac73 100644 --- a/src/main/java/org/jabref/logic/importer/IdParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/IdParserFetcher.java @@ -89,6 +89,7 @@ default Optional findIdentifier(BibEntry entry) throws FetcherException { return Optional.empty(); } catch (IOException e) { // TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource + // TODO catch 503 service unavailable and alert user throw new FetcherException("An I/O exception occurred", e); } catch (ParseException e) { throw new FetcherException("An internal parser error occurred", e); diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java index 5e4c3903531..3f17abbaee4 100644 --- a/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -94,9 +94,10 @@ public static List getEntryBasedFetchers(ImportFormatPreferen return list; } - public static List getIdFetchers() { + public static List getIdFetchers(ImportFormatPreferences importFormatPreferences) { ArrayList list = new ArrayList<>(); list.add(new CrossRef()); + list.add(new ArXiv(importFormatPreferences)); list.sort(Comparator.comparing(WebFetcher::getName)); return list; } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java index 5575d0f1602..af617d5c289 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java @@ -20,6 +20,7 @@ import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.FulltextFetcher; import org.jabref.logic.importer.IdBasedFetcher; +import org.jabref.logic.importer.IdFetcher; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.SearchBasedFetcher; import org.jabref.logic.importer.util.OAI2Handler; @@ -32,6 +33,7 @@ import org.jabref.model.entry.identifier.ArXivIdentifier; import org.jabref.model.entry.identifier.DOI; import org.jabref.model.strings.StringUtil; +import org.jabref.model.util.OptionalUtil; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -51,7 +53,7 @@ * arxiv2bib which is live * dspace-portalmec */ -public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher { +public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher, IdFetcher { private static final Log LOGGER = LogFactory.getLog(ArXiv.class); private static final String API_URL = "http://export.arxiv.org/api/query"; @@ -65,51 +67,23 @@ public ArXiv(ImportFormatPreferences importFormatPreferences) { @Override public Optional findFullText(BibEntry entry) throws IOException { Objects.requireNonNull(entry); - Optional pdfUrl = Optional.empty(); - // 1. Eprint - Optional identifier = entry.getField(FieldName.EPRINT); - if (StringUtil.isNotBlank(identifier)) { - try { - // Get pdf of entry with the specified id - pdfUrl = searchForEntryById(identifier.get()).flatMap(ArXivEntry::getPdfUrl); - if (pdfUrl.isPresent()) { - LOGGER.info("Fulltext PDF found @ arXiv."); - return pdfUrl; - } - } catch (FetcherException e) { - LOGGER.warn("arXiv eprint API request failed", e); - } - } - - // 2. DOI - Optional doi = entry.getField(FieldName.DOI).flatMap(DOI::build); - if (doi.isPresent()) { - String doiString = doi.get().getDOI(); - // Search for an entry in the ArXiv which is linked to the doi - try { - Optional arxivEntry = searchForEntry("doi:" + doiString); - - if (arxivEntry.isPresent()) { - // Check if entry is a match - StringSimilarity match = new StringSimilarity(); - String arxivTitle = arxivEntry.get().title.orElse(""); - String entryTitle = entry.getField(FieldName.TITLE).orElse(""); - - if (match.isSimilar(arxivTitle, entryTitle)) { - pdfUrl = arxivEntry.get().getPdfUrl(); - if (pdfUrl.isPresent()) { - LOGGER.info("Fulltext PDF found @ arXiv."); - return pdfUrl; - } - } - } - } catch (FetcherException e) { - LOGGER.warn("arXiv DOI API request failed", e); + try { + Optional pdfUrl = searchForEntries(entry).stream() + .map(ArXivEntry::getPdfUrl) + .filter(Optional::isPresent) + .map(Optional::get) + .findFirst(); + + if (pdfUrl.isPresent()) { + LOGGER.info("Fulltext PDF found @ arXiv."); } + return pdfUrl; + } catch (FetcherException e) { + LOGGER.warn("arXiv API request failed", e); } - return pdfUrl; + return Optional.empty(); } private Optional searchForEntry(String searchQuery) throws FetcherException { @@ -135,6 +109,47 @@ private Optional searchForEntryById(String id) throws FetcherExcepti } } + private List searchForEntries(BibEntry entry) throws FetcherException { + // 1. Eprint + Optional identifier = entry.getField(FieldName.EPRINT); + if (StringUtil.isNotBlank(identifier)) { + try { + // Get pdf of entry with the specified id + return OptionalUtil.toList(searchForEntryById(identifier.get())); + } catch (FetcherException e) { + LOGGER.warn("arXiv eprint API request failed", e); + } + } + + // 2. DOI and other fields + String query; + + Optional doi = entry.getField(FieldName.DOI).flatMap(DOI::build).map(DOI::getNormalized); + if (doi.isPresent()) { + // Search for an entry in the ArXiv which is linked to the doi + query = "doi:" + doi.get(); + } else { + Optional authorQuery = entry.getField(FieldName.AUTHOR).map(author -> "au:" + author); + Optional titleQuery = entry.getField(FieldName.TITLE).map(title -> "ti:" + title); + query = OptionalUtil.toList(authorQuery, titleQuery).stream().collect(Collectors.joining("+AND+")); + } + + Optional arxivEntry = searchForEntry(query); + + if (arxivEntry.isPresent()) { + // Check if entry is a match + StringSimilarity match = new StringSimilarity(); + String arxivTitle = arxivEntry.get().title.orElse(""); + String entryTitle = entry.getField(FieldName.TITLE).orElse(""); + + if (match.isSimilar(arxivTitle, entryTitle)) { + return OptionalUtil.toList(arxivEntry); + } + } + + return Collections.emptyList(); + } + private List searchForEntries(String searchQuery) throws FetcherException { return queryApi(searchQuery, Collections.emptyList(), 0, 10); } @@ -242,6 +257,19 @@ public Optional performSearchById(String identifier) throws FetcherExc (arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator())); } + @Override + public Optional findIdentifier(BibEntry entry) throws FetcherException { + return searchForEntries(entry).stream() + .map(ArXivEntry::getId) + .filter(Optional::isPresent) + .map(Optional::get) + .findFirst(); + } + + @Override + public String getIdentifierName() { + return "ArXiv"; + } private static class ArXivEntry { @@ -326,7 +354,7 @@ public Optional getPdfUrl() { /** * Returns the arXiv identifier */ - public Optional getId() { + public Optional getIdString() { // remove leading http://arxiv.org/abs/ from abstract url to get arXiv ID String prefix = "http://arxiv.org/abs/"; return urlAbstractPage.map(abstractUrl -> { @@ -338,6 +366,10 @@ public Optional getId() { }); } + public Optional getId() { + return getIdString().flatMap(ArXivIdentifier::parse); + } + /** * Returns the date when the first version was put on the arXiv */ @@ -358,7 +390,7 @@ public BibEntry toBibEntry(Character keywordDelimiter) { bibEntry.setField(FieldName.EPRINTTYPE, "arXiv"); bibEntry.setField(FieldName.AUTHOR, String.join(" and ", authorNames)); bibEntry.addKeywords(categories, keywordDelimiter); - getId().ifPresent(id -> bibEntry.setField(FieldName.EPRINT, id)); + getIdString().ifPresent(id -> bibEntry.setField(FieldName.EPRINT, id)); title.ifPresent(titleContent -> bibEntry.setField(FieldName.TITLE, titleContent)); doi.ifPresent(doiContent -> bibEntry.setField(FieldName.DOI, doiContent)); abstractText.ifPresent(abstractContent -> bibEntry.setField(FieldName.ABSTRACT, abstractContent)); diff --git a/src/main/java/org/jabref/logic/util/io/RegExpFileSearch.java b/src/main/java/org/jabref/logic/util/io/RegExpFileSearch.java index 0b43ee0b2cd..ec6a5e72efc 100644 --- a/src/main/java/org/jabref/logic/util/io/RegExpFileSearch.java +++ b/src/main/java/org/jabref/logic/util/io/RegExpFileSearch.java @@ -21,7 +21,6 @@ import org.apache.commons.logging.LogFactory; public class RegExpFileSearch { - private static final Log LOGGER = LogFactory.getLog(RegExpFileSearch.class); private static final String EXT_MARKER = "__EXTENSION__"; @@ -120,7 +119,6 @@ private static List findFile(BibEntry entry, List dirs, String file, /** * Internal Version of findFile, which also accepts a current directory to * base the search on. - * */ private static List findFile(BibEntry entry, String directory, String file, String extensionRegExp, Character keywordDelimiter) { diff --git a/src/main/java/org/jabref/logic/util/strings/StringSimilarity.java b/src/main/java/org/jabref/logic/util/strings/StringSimilarity.java index c17443f07fb..2b83e2b9a8d 100644 --- a/src/main/java/org/jabref/logic/util/strings/StringSimilarity.java +++ b/src/main/java/org/jabref/logic/util/strings/StringSimilarity.java @@ -6,7 +6,7 @@ public class StringSimilarity { private final Levenshtein METRIC_DISTANCE = new Levenshtein(); - // edit distance threshold for entry title comnparison + // edit distance threshold for entry title comparison private final int METRIC_THRESHOLD = 4; /** @@ -20,8 +20,8 @@ public boolean isSimilar(String a, String b) { return editDistanceIgnoreCase(a, b) <= METRIC_THRESHOLD; } - private double editDistanceIgnoreCase(String a, String b) { - // TODO: locale is dependent on the language of the strings?! + public double editDistanceIgnoreCase(String a, String b) { + // TODO: Locale is dependent on the language of the strings. English is a good denominator. return METRIC_DISTANCE.distance(a.toLowerCase(Locale.ENGLISH), b.toLowerCase(Locale.ENGLISH)); } } diff --git a/src/main/java/org/jabref/model/entry/FieldName.java b/src/main/java/org/jabref/model/entry/FieldName.java index b15b8b5f513..78348cafcf5 100644 --- a/src/main/java/org/jabref/model/entry/FieldName.java +++ b/src/main/java/org/jabref/model/entry/FieldName.java @@ -1,6 +1,5 @@ package org.jabref.model.entry; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -191,10 +190,12 @@ public static String getDisplayName(String field) { return StringUtil.capitalizeFirst(field); } - public static ArrayList getNotTextFieldNames() { - ArrayList notTextFieldNames = new ArrayList<>(); - notTextFieldNames.addAll(Arrays.asList(FieldName.DOI, FieldName.FILE, FieldName.URL, FieldName.URI, FieldName.ISBN, FieldName.ISSN, FieldName.MONTH, FieldName.DATE, FieldName.YEAR)); - return notTextFieldNames; + public static List getNotTextFieldNames() { + return Arrays.asList(FieldName.DOI, FieldName.FILE, FieldName.URL, FieldName.URI, FieldName.ISBN, FieldName.ISSN, FieldName.MONTH, FieldName.DATE, FieldName.YEAR); + } + + public static List getIdentifierFieldNames() { + return Arrays.asList(FieldName.DOI, FieldName.EPRINT, FieldName.PMID); } } diff --git a/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java b/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java index 19c49bfb979..3c73f7064e5 100644 --- a/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java +++ b/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java @@ -3,7 +3,9 @@ import java.util.Objects; import java.util.Optional; -public class ArXivIdentifier { +import org.jabref.model.entry.FieldName; + +public class ArXivIdentifier implements Identifier { private final String identifier; @@ -31,6 +33,12 @@ public int hashCode() { return identifier.hashCode(); } + @Override + public String getDefaultField() { + return FieldName.EPRINT; + } + + @Override public String getNormalized() { return identifier; } diff --git a/src/test/java/org/jabref/CodeStyleTests.java b/src/test/java/org/jabref/CodeStyleTests.java index 3d61a5bbe27..7a1cef60a65 100644 --- a/src/test/java/org/jabref/CodeStyleTests.java +++ b/src/test/java/org/jabref/CodeStyleTests.java @@ -19,6 +19,6 @@ public void StringUtilClassIsSmall() throws Exception { Assert.assertTrue("StringUtil increased in size. " + "We try to keep this class as small as possible. " - + "Thus think twice if you add something to StringUtil.", lineCount <= 709); + + "Thus think twice if you add something to StringUtil.", lineCount <= 715); } } diff --git a/src/test/java/org/jabref/model/DuplicateCheckTest.java b/src/test/java/org/jabref/logic/bibtex/DuplicateCheckTest.java similarity index 53% rename from src/test/java/org/jabref/model/DuplicateCheckTest.java rename to src/test/java/org/jabref/logic/bibtex/DuplicateCheckTest.java index c5a68e3b45d..d1c56661fbc 100644 --- a/src/test/java/org/jabref/model/DuplicateCheckTest.java +++ b/src/test/java/org/jabref/logic/bibtex/DuplicateCheckTest.java @@ -1,9 +1,12 @@ -package org.jabref.model; +package org.jabref.logic.bibtex; import org.jabref.model.database.BibDatabaseMode; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BibtexEntryType; import org.jabref.model.entry.BibtexEntryTypes; +import org.jabref.model.entry.FieldName; +import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -12,6 +15,21 @@ public class DuplicateCheckTest { + private BibEntry simpleArticle; + private BibEntry unrelatedArticle; + + @Before + public void setUp() { + simpleArticle = new BibEntry(BibtexEntryTypes.ARTICLE.getName()) + .withField(FieldName.AUTHOR, "Single Author") + .withField(FieldName.TITLE, "A serious paper about something") + .withField(FieldName.YEAR, "2017"); + unrelatedArticle = new BibEntry(BibtexEntryTypes.ARTICLE.getName()) + .withField(FieldName.AUTHOR, "Completely Different") + .withField(FieldName.TITLE, "Holy Moly Uffdada und Trallalla") + .withField(FieldName.YEAR, "1992"); + } + @Test public void testDuplicateDetection() { BibEntry one = new BibEntry(BibtexEntryTypes.ARTICLE.getName()); @@ -79,4 +97,50 @@ public void testWordCorrelation() { assertEquals(0.78, (DuplicateCheck.correlateByWords(d2, d3)), 0.01); } + @Test + public void twoUnrelatedEntriesAreNoDuplicates() { + assertFalse(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); + } + + @Test + public void twoUnrelatedEntriesWithDifferentDoisAreNoDuplicates() { + simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); + unrelatedArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.00X"); + + assertFalse(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); + } + + @Test + public void twoUnrelatedEntriesWithEqualDoisAreDuplicates() { + simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); + unrelatedArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); + + assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); + } + + @Test + public void twoUnrelatedEntriesWithEqualPmidAreDuplicates() { + simpleArticle.setField(FieldName.PMID, "12345678"); + unrelatedArticle.setField(FieldName.PMID, "12345678"); + + assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); + } + + @Test + public void twoUnrelatedEntriesWithEqualEprintAreDuplicates() { + simpleArticle.setField(FieldName.EPRINT, "12345678"); + unrelatedArticle.setField(FieldName.EPRINT, "12345678"); + + assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); + } + + @Test + public void twoEntriesWithSameDoiButDifferentTypesAreDuplicates() { + simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); + BibEntry duplicateWithDifferentType = (BibEntry) simpleArticle.clone(); + duplicateWithDifferentType.setType(BibtexEntryTypes.INCOLLECTION); + + assertTrue(DuplicateCheck.isDuplicate(simpleArticle, duplicateWithDifferentType, BibDatabaseMode.BIBTEX)); + } + } diff --git a/src/test/java/org/jabref/logic/importer/WebFetchersTest.java b/src/test/java/org/jabref/logic/importer/WebFetchersTest.java index 7f179ef5fc5..0d09defd4a1 100644 --- a/src/test/java/org/jabref/logic/importer/WebFetchersTest.java +++ b/src/test/java/org/jabref/logic/importer/WebFetchersTest.java @@ -60,7 +60,7 @@ public void getSearchBasedFetchersReturnsAllFetcherDerivingFromSearchBasedFetche @Test public void getIdFetchersReturnsAllFetcherDerivingFromIdFetcher() throws Exception { - List idFetchers = WebFetchers.getIdFetchers(); + List idFetchers = WebFetchers.getIdFetchers(importFormatPreferences); Set> expected = reflections.getSubTypesOf(IdFetcher.class); expected.remove(IdParserFetcher.class); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java index 47af5898eea..1e6a4545f6e 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java @@ -10,6 +10,7 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BiblatexEntryTypes; import org.jabref.model.entry.FieldName; +import org.jabref.model.entry.identifier.ArXivIdentifier; import org.jabref.testutils.category.FetcherTests; import org.junit.Assert; @@ -52,39 +53,40 @@ public void setUp() { } @Test - public void noIdentifierPresent() throws IOException { + public void findFullTextForEmptyEntryResultsEmptyOptional() throws IOException { assertEquals(Optional.empty(), finder.findFullText(entry)); } @Test(expected = NullPointerException.class) - public void rejectNullParameter() throws IOException { + public void findFullTextRejectsNullParameter() throws IOException { finder.findFullText(null); Assert.fail(); } @Test - public void findByDOI() throws IOException { + public void findFullTextByDOI() throws IOException { entry.setField(FieldName.DOI, "10.1529/biophysj.104.047340"); entry.setField(FieldName.TITLE, "Pause Point Spectra in DNA Constant-Force Unzipping"); assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry)); + } @Test - public void findByEprint() throws IOException { + public void findFullTextByEprint() throws IOException { entry.setField("eprint", "1603.06570"); assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry)); } @Test - public void findByEprintWithPrefix() throws IOException { + public void findFullTextByEprintWithPrefix() throws IOException { entry.setField("eprint", "arXiv:1603.06570"); assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry)); } @Test - public void findByEprintWithUnknownDOI() throws IOException { + public void findFullTextByEprintWithUnknownDOI() throws IOException { entry.setField("doi", "10.1529/unknown"); entry.setField("eprint", "1603.06570"); @@ -92,21 +94,36 @@ public void findByEprintWithUnknownDOI() throws IOException { } @Test - public void notFoundByUnknownDOI() throws IOException { + public void findFullTextByTitle() throws IOException { + entry.setField("title", "Pause Point Spectra in DNA Constant-Force Unzipping"); + + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry)); + } + + @Test + public void findFullTextByTitleAndPartOfAuthor() throws IOException { + entry.setField("title", "Pause Point Spectra in DNA Constant-Force Unzipping"); + entry.setField("author", "Weeks and Lucks"); + + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry)); + } + + @Test + public void notFindFullTextByUnknownDOI() throws IOException { entry.setField("doi", "10.1529/unknown"); assertEquals(Optional.empty(), finder.findFullText(entry)); } @Test - public void notFoundByUnknownId() throws IOException { + public void notFindFullTextByUnknownId() throws IOException { entry.setField("eprint", "1234.12345"); assertEquals(Optional.empty(), finder.findFullText(entry)); } @Test - public void findByDOINotAvailableInCatalog() throws IOException { + public void findFullTextByDOINotAvailableInCatalog() throws IOException { entry.setField(FieldName.DOI, "10.1016/0370-2693(77)90015-6"); entry.setField(FieldName.TITLE, "Superspace formulation of supergravity"); @@ -173,4 +190,11 @@ public void searchWithMalformedIdThrowsException() throws Exception { expectedException.expectMessage("incorrect id format"); finder.performSearchById("123412345"); } + + @Test + public void searchIdentifierForSlicePaper() throws Exception { + sliceTheoremPaper.clearField(FieldName.EPRINT); + + assertEquals(ArXivIdentifier.parse("1405.2249v1"), finder.findIdentifier(sliceTheoremPaper)); + } }