Skip to content

Commit

Permalink
Fix arXiv fetcher tests (#7686)
Browse files Browse the repository at this point in the history
* Add some code comments and remove some empty lines

* Fix tests (arXiv does not use https URLs)

* Fix https

* Readbility fixes
  • Loading branch information
koppor authored May 2, 2021
1 parent 0363863 commit 075901a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 25 deletions.
38 changes: 16 additions & 22 deletions src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.jabref.logic.cleanup.CleanupJob;
import org.jabref.logic.cleanup.EprintCleanup;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
Expand Down Expand Up @@ -80,7 +79,6 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
.map(Optional::get)
.findFirst();
pdfUrl.ifPresent(url -> LOGGER.info("Fulltext PDF found @ arXiv."));

return pdfUrl;
} catch (FetcherException e) {
LOGGER.warn("arXiv API request failed", e);
Expand Down Expand Up @@ -117,11 +115,12 @@ private Optional<ArXivEntry> searchForEntryById(String id) throws FetcherExcepti
}
}

private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherException {
entry = (BibEntry) entry.clone();
CleanupJob cleanupJob = new EprintCleanup();
cleanupJob.cleanup(entry);
// 1. Eprint
private List<ArXivEntry> searchForEntries(BibEntry originalEntry) throws FetcherException {
// We need to clone the entry, because we modify it by a cleanup job.
final BibEntry entry = (BibEntry) originalEntry.clone();

// 1. Check for Eprint
new EprintCleanup().cleanup(entry);
Optional<String> identifier = entry.getField(StandardField.EPRINT);
if (StringUtil.isNotBlank(identifier)) {
try {
Expand All @@ -133,26 +132,21 @@ private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherExceptio
}

// 2. DOI and other fields
String query;

Optional<String> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse).map(DOI::getNormalized);
if (doi.isPresent()) {
// Search for an entry in the ArXiv which is linked to the doi
query = "doi:" + doi.get();
} else {
Optional<String> authorQuery = entry.getField(StandardField.AUTHOR).map(author -> "au:" + author);
Optional<String> titleQuery = entry.getField(StandardField.TITLE).map(title -> "ti:" + StringUtil.ignoreCurlyBracket(title));
query = OptionalUtil.toList(authorQuery, titleQuery).stream().collect(Collectors.joining("+AND+"));
}

String query = entry.getField(StandardField.DOI)
.flatMap(DOI::parse)
.map(DOI::getNormalized)
.map(doiString -> "doi:" + doiString)
.orElseGet(() -> {
Optional<String> authorQuery = entry.getField(StandardField.AUTHOR).map(author -> "au:" + author);
Optional<String> titleQuery = entry.getField(StandardField.TITLE).map(title -> "ti:" + StringUtil.ignoreCurlyBracket(title));
return String.join("+AND+", OptionalUtil.toList(authorQuery, titleQuery));
});
Optional<ArXivEntry> arxivEntry = searchForEntry(query);

if (arxivEntry.isPresent()) {
// Check if entry is a match
StringSimilarity match = new StringSimilarity();
String arxivTitle = arxivEntry.get().title.orElse("");
String entryTitle = StringUtil.ignoreCurlyBracket(entry.getField(StandardField.TITLE).orElse(""));

if (match.isSimilar(arxivTitle, entryTitle)) {
return OptionalUtil.toList(arxivEntry);
}
Expand All @@ -175,7 +169,7 @@ private List<ArXivEntry> queryApi(String searchQuery, List<ArXivIdentifier> ids,

/**
* Queries the API.
*
* <p>
* If only {@code searchQuery} is given, then the API will return results for each article that matches the query.
* If only {@code ids} is given, then the API will return results for each article in the list.
* If both {@code searchQuery} and {@code ids} are given, then the API will return each article in
Expand Down
1 change: 0 additions & 1 deletion src/main/java/org/jabref/model/strings/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,6 @@ public static String boldHTML(String input) {
* Return string enclosed in HTML bold tags if not null, otherwise return alternative text in HTML bold tags
*/
public static String boldHTML(String input, String alternative) {

if (input == null) {
return "<b>" + alternative + "</b>";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void findFullTextByTitle() throws IOException {
void findFullTextByTitleWithCurlyBracket() throws IOException {
entry.setField(StandardField.TITLE, "Machine versus {Human} {Attention} in {Deep} {Reinforcement} {Learning} {Tasks}");

assertEquals(Optional.of(new URL("https://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
assertEquals(Optional.of(new URL("http://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
}

@Test
Expand Down Expand Up @@ -131,7 +131,7 @@ void findFullTextByTitleWithCurlyBracketAndPartOfAuthor() throws IOException {
entry.setField(StandardField.TITLE, "Machine versus {Human} {Attention} in {Deep} {Reinforcement} {Learning} {Tasks}");
entry.setField(StandardField.AUTHOR, "Zhang, Ruohan and Guo");

assertEquals(Optional.of(new URL("https://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
assertEquals(Optional.of(new URL("http://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
}

@Test
Expand Down

0 comments on commit 075901a

Please sign in to comment.