Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ArXiv identifier batch lookup #2710

Merged
merged 1 commit into from
Apr 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- The `Move linked files to default file directory`-Cleanup operation respects the `File directory pattern` setting
- We separated the `Move file` and `Rename Pdfs` logic and context menu entries in the `General`-Tab for the Field `file` to improve the semantics
- A scrollbar was added to the cleanup panel, as a result of issue [#2501](https://github.com/JabRef/jabref/issues/2501)
- Using "Look up document identifier" in the quality menu, it is possible to look up DOIs and other identifiers for multiple entries.
- Using "Look up document identifier" in the quality menu, it is possible to look up DOIs, ArXiv ids and other identifiers for multiple entries.
- F4 opens selected file in current JTable context not just from selected entry inside the main table [#2355](https://github.com/JabRef/jabref/issues/2355)
- We added an option to copy the title of BibTeX entries to the clipboard through `Edit -> Copy title` (implements [#210](https://github.com/koppor/jabref/issues/210))
- Several scrollbars were added to the preference dialog which show up when content is too large [#2559](https://github.com/JabRef/jabref/issues/2559)
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/gui/JabRefFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,7 @@ private void fillMenu() {
quality.add(findUnlinkedFiles);
quality.add(autoLinkFile);

for (IdFetcher fetcher : WebFetchers.getIdFetchers()) {
for (IdFetcher fetcher : WebFetchers.getIdFetchers(Globals.prefs.getImportFormatPreferences())) {
lookupIdentifiers.add(new LookupIdentifierAction(this, fetcher));
}
quality.add(lookupIdentifiers);
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,10 @@ public static List<EntryBasedFetcher> getEntryBasedFetchers(ImportFormatPreferen
return list;
}

public static List<IdFetcher> getIdFetchers() {
public static List<IdFetcher> getIdFetchers(ImportFormatPreferences importFormatPreferences) {
ArrayList<IdFetcher> list = new ArrayList<>();
list.add(new CrossRef());
list.add(new ArXiv(importFormatPreferences));
list.sort(Comparator.comparing(WebFetcher::getName));
return list;
}
Expand Down
120 changes: 76 additions & 44 deletions src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.importer.IdBasedFetcher;
import org.jabref.logic.importer.IdFetcher;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.util.OAI2Handler;
Expand All @@ -32,6 +33,7 @@
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.OptionalUtil;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
Expand All @@ -51,7 +53,7 @@
* <a href="https://github.com/nathangrigg/arxiv2bib">arxiv2bib</a> which is <a href="https://arxiv2bibtex.org/">live</a>
* <a herf="https://gitlab.c3sl.ufpr.br/portalmec/dspace-portalmec/blob/aa209d15082a9870f9daac42c78a35490ce77b52/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java">dspace-portalmec</a>
*/
public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher {
public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {
private static final Log LOGGER = LogFactory.getLog(ArXiv.class);

private static final String API_URL = "http://export.arxiv.org/api/query";
Expand All @@ -65,51 +67,23 @@ public ArXiv(ImportFormatPreferences importFormatPreferences) {
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
Optional<URL> pdfUrl = Optional.empty();

// 1. Eprint
Optional<String> identifier = entry.getField(FieldName.EPRINT);
if (StringUtil.isNotBlank(identifier)) {
try {
// Get pdf of entry with the specified id
pdfUrl = searchForEntryById(identifier.get()).flatMap(ArXivEntry::getPdfUrl);
if (pdfUrl.isPresent()) {
LOGGER.info("Fulltext PDF found @ arXiv.");
return pdfUrl;
}
} catch (FetcherException e) {
LOGGER.warn("arXiv eprint API request failed", e);
}
}

// 2. DOI
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::build);
if (doi.isPresent()) {
String doiString = doi.get().getDOI();
// Search for an entry in the ArXiv which is linked to the doi
try {
Optional<ArXivEntry> arxivEntry = searchForEntry("doi:" + doiString);

if (arxivEntry.isPresent()) {
// Check if entry is a match
StringSimilarity match = new StringSimilarity();
String arxivTitle = arxivEntry.get().title.orElse("");
String entryTitle = entry.getField(FieldName.TITLE).orElse("");

if (match.isSimilar(arxivTitle, entryTitle)) {
pdfUrl = arxivEntry.get().getPdfUrl();
if (pdfUrl.isPresent()) {
LOGGER.info("Fulltext PDF found @ arXiv.");
return pdfUrl;
}
}
}
} catch (FetcherException e) {
LOGGER.warn("arXiv DOI API request failed", e);
try {
Optional<URL> pdfUrl = searchForEntries(entry).stream()
.map(ArXivEntry::getPdfUrl)
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();

if (pdfUrl.isPresent()) {
LOGGER.info("Fulltext PDF found @ arXiv.");
}
return pdfUrl;
} catch (FetcherException e) {
LOGGER.warn("arXiv API request failed", e);
}

return pdfUrl;
return Optional.empty();
}

private Optional<ArXivEntry> searchForEntry(String searchQuery) throws FetcherException {
Expand All @@ -135,6 +109,47 @@ private Optional<ArXivEntry> searchForEntryById(String id) throws FetcherExcepti
}
}

private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherException {
// 1. Eprint
Optional<String> identifier = entry.getField(FieldName.EPRINT);
if (StringUtil.isNotBlank(identifier)) {
try {
// Get pdf of entry with the specified id
return OptionalUtil.toList(searchForEntryById(identifier.get()));
} catch (FetcherException e) {
LOGGER.warn("arXiv eprint API request failed", e);
}
}

// 2. DOI and other fields
String query;

Optional<String> doi = entry.getField(FieldName.DOI).flatMap(DOI::build).map(DOI::getNormalized);
if (doi.isPresent()) {
// Search for an entry in the ArXiv which is linked to the doi
query = "doi:" + doi.get();
} else {
Optional<String> authorQuery = entry.getField(FieldName.AUTHOR).map(author -> "au:" + author);
Optional<String> titleQuery = entry.getField(FieldName.TITLE).map(title -> "ti:" + title);
query = OptionalUtil.toList(authorQuery, titleQuery).stream().collect(Collectors.joining("+AND+"));
}

Optional<ArXivEntry> arxivEntry = searchForEntry(query);

if (arxivEntry.isPresent()) {
// Check if entry is a match
StringSimilarity match = new StringSimilarity();
String arxivTitle = arxivEntry.get().title.orElse("");
String entryTitle = entry.getField(FieldName.TITLE).orElse("");

if (match.isSimilar(arxivTitle, entryTitle)) {
return OptionalUtil.toList(arxivEntry);
}
}

return Collections.emptyList();
}

private List<ArXivEntry> searchForEntries(String searchQuery) throws FetcherException {
return queryApi(searchQuery, Collections.emptyList(), 0, 10);
}
Expand Down Expand Up @@ -242,6 +257,19 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
(arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()));
}

@Override
public Optional<ArXivIdentifier> findIdentifier(BibEntry entry) throws FetcherException {
return searchForEntries(entry).stream()
.map(ArXivEntry::getId)
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();
}

@Override
public String getIdentifierName() {
return "ArXiv";
}

private static class ArXivEntry {

Expand Down Expand Up @@ -326,7 +354,7 @@ public Optional<URL> getPdfUrl() {
/**
* Returns the arXiv identifier
*/
public Optional<String> getId() {
public Optional<String> getIdString() {
// remove leading http://arxiv.org/abs/ from abstract url to get arXiv ID
String prefix = "http://arxiv.org/abs/";
return urlAbstractPage.map(abstractUrl -> {
Expand All @@ -338,6 +366,10 @@ public Optional<String> getId() {
});
}

public Optional<ArXivIdentifier> getId() {
return getIdString().flatMap(ArXivIdentifier::parse);
}

/**
* Returns the date when the first version was put on the arXiv
*/
Expand All @@ -358,7 +390,7 @@ public BibEntry toBibEntry(Character keywordDelimiter) {
bibEntry.setField(FieldName.EPRINTTYPE, "arXiv");
bibEntry.setField(FieldName.AUTHOR, String.join(" and ", authorNames));
bibEntry.addKeywords(categories, keywordDelimiter);
getId().ifPresent(id -> bibEntry.setField(FieldName.EPRINT, id));
getIdString().ifPresent(id -> bibEntry.setField(FieldName.EPRINT, id));
title.ifPresent(titleContent -> bibEntry.setField(FieldName.TITLE, titleContent));
doi.ifPresent(doiContent -> bibEntry.setField(FieldName.DOI, doiContent));
abstractText.ifPresent(abstractContent -> bibEntry.setField(FieldName.ABSTRACT, abstractContent));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import java.util.Objects;
import java.util.Optional;

public class ArXivIdentifier {
import org.jabref.model.entry.FieldName;

public class ArXivIdentifier implements Identifier {

private final String identifier;

Expand Down Expand Up @@ -31,6 +33,12 @@ public int hashCode() {
return identifier.hashCode();
}

@Override
public String getDefaultField() {
return FieldName.EPRINT;
}

@Override
public String getNormalized() {
return identifier;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public void getSearchBasedFetchersReturnsAllFetcherDerivingFromSearchBasedFetche

@Test
public void getIdFetchersReturnsAllFetcherDerivingFromIdFetcher() throws Exception {
List<IdFetcher> idFetchers = WebFetchers.getIdFetchers();
List<IdFetcher> idFetchers = WebFetchers.getIdFetchers(importFormatPreferences);

Set<Class<? extends IdFetcher>> expected = reflections.getSubTypesOf(IdFetcher.class);
expected.remove(IdParserFetcher.class);
Expand Down
42 changes: 33 additions & 9 deletions src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BiblatexEntryTypes;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.testutils.category.FetcherTests;

import org.junit.Assert;
Expand Down Expand Up @@ -52,61 +53,77 @@ public void setUp() {
}

@Test
public void noIdentifierPresent() throws IOException {
public void findFullTextForEmptyEntryResultsEmptyOptional() throws IOException {
assertEquals(Optional.empty(), finder.findFullText(entry));
}

@Test(expected = NullPointerException.class)
public void rejectNullParameter() throws IOException {
public void findFullTextRejectsNullParameter() throws IOException {
finder.findFullText(null);
Assert.fail();
}

@Test
public void findByDOI() throws IOException {
public void findFullTextByDOI() throws IOException {
entry.setField(FieldName.DOI, "10.1529/biophysj.104.047340");
entry.setField(FieldName.TITLE, "Pause Point Spectra in DNA Constant-Force Unzipping");

assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry));

}

@Test
public void findByEprint() throws IOException {
public void findFullTextByEprint() throws IOException {
entry.setField("eprint", "1603.06570");

assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry));
}

@Test
public void findByEprintWithPrefix() throws IOException {
public void findFullTextByEprintWithPrefix() throws IOException {
entry.setField("eprint", "arXiv:1603.06570");
assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry));
}

@Test
public void findByEprintWithUnknownDOI() throws IOException {
public void findFullTextByEprintWithUnknownDOI() throws IOException {
entry.setField("doi", "10.1529/unknown");
entry.setField("eprint", "1603.06570");

assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry));
}

@Test
public void notFoundByUnknownDOI() throws IOException {
public void findFullTextByTitle() throws IOException {
entry.setField("title", "Pause Point Spectra in DNA Constant-Force Unzipping");

assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry));
}

@Test
public void findFullTextByTitleAndPartOfAuthor() throws IOException {
entry.setField("title", "Pause Point Spectra in DNA Constant-Force Unzipping");
entry.setField("author", "Weeks and Lucks");

assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry));
}

@Test
public void notFindFullTextByUnknownDOI() throws IOException {
entry.setField("doi", "10.1529/unknown");

assertEquals(Optional.empty(), finder.findFullText(entry));
}

@Test
public void notFoundByUnknownId() throws IOException {
public void notFindFullTextByUnknownId() throws IOException {
entry.setField("eprint", "1234.12345");

assertEquals(Optional.empty(), finder.findFullText(entry));
}

@Test
public void findByDOINotAvailableInCatalog() throws IOException {
public void findFullTextByDOINotAvailableInCatalog() throws IOException {
entry.setField(FieldName.DOI, "10.1016/0370-2693(77)90015-6");
entry.setField(FieldName.TITLE, "Superspace formulation of supergravity");

Expand Down Expand Up @@ -173,4 +190,11 @@ public void searchWithMalformedIdThrowsException() throws Exception {
expectedException.expectMessage("incorrect id format");
finder.performSearchById("123412345");
}

@Test
public void searchIdentifierForSlicePaper() throws Exception {
sliceTheoremPaper.clearField(FieldName.EPRINT);

assertEquals(ArXivIdentifier.parse("1405.2249v1"), finder.findIdentifier(sliceTheoremPaper));
}
}