Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix fetcher tests #4216

Merged
merged 8 commits into from
Jul 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 25 additions & 19 deletions src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.xml.parsers.DocumentBuilder;
Expand Down Expand Up @@ -53,9 +55,12 @@
* <a herf="https://gitlab.c3sl.ufpr.br/portalmec/dspace-portalmec/blob/aa209d15082a9870f9daac42c78a35490ce77b52/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java">dspace-portalmec</a>
*/
public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {

private static final Logger LOGGER = LoggerFactory.getLogger(ArXiv.class);

private static final String API_URL = "https://export.arxiv.org/api/query";
private static final String ARXIV_URL_PREFIX_FOR_ID = "(https?://arxiv.org/abs/)";
private static final Pattern URL_PATTERN = Pattern.compile(ARXIV_URL_PREFIX_FOR_ID);

private final ImportFormatPreferences importFormatPreferences;

Expand All @@ -69,10 +74,10 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {

try {
Optional<URL> pdfUrl = searchForEntries(entry).stream()
.map(ArXivEntry::getPdfUrl)
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();
.map(ArXivEntry::getPdfUrl)
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();

if (pdfUrl.isPresent()) {
LOGGER.info("Fulltext PDF found @ arXiv.");
Expand Down Expand Up @@ -159,7 +164,7 @@ private List<ArXivEntry> searchForEntries(String searchQuery) throws FetcherExce
}

private List<ArXivEntry> queryApi(String searchQuery, List<ArXivIdentifier> ids, int start, int maxResults)
throws FetcherException {
throws FetcherException {
Document result = callApi(searchQuery, ids, start, maxResults);
List<Node> entries = XMLUtil.asList(result.getElementsByTagName("entry"));

Expand Down Expand Up @@ -195,7 +200,7 @@ private Document callApi(String searchQuery, List<ArXivIdentifier> ids, int star
}
if (!ids.isEmpty()) {
uriBuilder.addParameter("id_list",
ids.stream().map(ArXivIdentifier::getNormalized).collect(Collectors.joining(",")));
ids.stream().map(ArXivIdentifier::getNormalized).collect(Collectors.joining(",")));
}
uriBuilder.addParameter("start", String.valueOf(start));
uriBuilder.addParameter("max_results", String.valueOf(maxResults));
Expand Down Expand Up @@ -252,7 +257,8 @@ public HelpFile getHelpPage() {
@Override
public List<BibEntry> performSearch(String query) throws FetcherException {
return searchForEntries(query).stream().map(
(arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator())).collect(Collectors.toList());
(arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
.collect(Collectors.toList());
}

@Override
Expand All @@ -266,10 +272,10 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
@Override
public Optional<ArXivIdentifier> findIdentifier(BibEntry entry) throws FetcherException {
return searchForEntries(entry).stream()
.map(ArXivEntry::getId)
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();
.map(ArXivEntry::getId)
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();
}

@Override
Expand All @@ -290,7 +296,6 @@ private static class ArXivEntry {
private final Optional<String> journalReferenceText;
private final Optional<String> primaryCategory;


public ArXivEntry(Node item) {
// see https://arxiv.org/help/api/user-manual#_details_of_atom_results_returned

Expand Down Expand Up @@ -347,7 +352,7 @@ public ArXivEntry(Node item) {
// Primary category
// Ex: <arxiv:primary_category xmlns:arxiv="https://arxiv.org/schemas/atom" term="math-ph" scheme="http://arxiv.org/schemas/atom"/>
primaryCategory = XMLUtil.getNode(item, "arxiv:primary_category")
.flatMap(node -> XMLUtil.getAttributeContent(node, "term"));
.flatMap(node -> XMLUtil.getAttributeContent(node, "term"));
}

public static String correctLineBreaks(String s) {
Expand All @@ -367,14 +372,16 @@ public Optional<URL> getPdfUrl() {
* Returns the arXiv identifier
*/
public Optional<String> getIdString() {
// remove leading https://arxiv.org/abs/ from abstract url to get arXiv ID
String prefix = "https://arxiv.org/abs/";

return urlAbstractPage.map(abstractUrl -> {
if (abstractUrl.startsWith(prefix)) {
return abstractUrl.substring(prefix.length());
Matcher matcher = URL_PATTERN.matcher(abstractUrl);
if (matcher.find()) {
// remove leading http(s)://arxiv.org/abs/ from abstract url to get arXiv ID
return abstractUrl.substring(matcher.group(1).length());
} else {
return abstractUrl;
}

});
}

Expand Down Expand Up @@ -409,8 +416,7 @@ public BibEntry toBibEntry(Character keywordDelimiter) {
getDate().ifPresent(date -> bibEntry.setField(FieldName.DATE, date));
primaryCategory.ifPresent(category -> bibEntry.setField(FieldName.EPRINTCLASS, category));
journalReferenceText.ifPresent(journal -> bibEntry.setField(FieldName.JOURNALTITLE, journal));
getPdfUrl().ifPresent(url -> bibEntry
.setFiles(Collections.singletonList(new LinkedFile(url, "PDF"))));
getPdfUrl().ifPresent(url -> bibEntry.setFiles(Collections.singletonList(new LinkedFile(url, "PDF"))));
return bibEntry;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ private void putDate(Map<String, String> fields, String elementName, DateDefinit

private void putIfListIsNotEmpty(Map<String, String> fields, List<String> list, String key, String separator) {
if (!list.isEmpty()) {
fields.put(key, Joiner.on(separator).join(list));
fields.put(key, list.stream().collect(Collectors.joining(separator)));
}
}

Expand Down
6 changes: 4 additions & 2 deletions src/main/java/org/jabref/logic/net/ProgressInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,18 @@
* Code based on http://stackoverflow.com/a/1339589/873661, but converted to use JavaFX properties instead of listeners
*/
public class ProgressInputStream extends FilterInputStream {

private final long maxNumBytes;
private final LongProperty totalNumBytesRead;
private final LongProperty progress;

public ProgressInputStream(InputStream in, long maxNumBytes) {
super(in);
this.maxNumBytes = maxNumBytes;
this.totalNumBytesRead = new SimpleLongProperty(0);
this.progress = new SimpleLongProperty(0);
this.progress.bind(totalNumBytesRead.divide(maxNumBytes));

this.maxNumBytes = maxNumBytes <= 0 ? 1 : maxNumBytes;
this.progress.bind(totalNumBytesRead.divide(this.maxNumBytes));
}

public long getTotalNumBytesRead() {
Expand Down
16 changes: 12 additions & 4 deletions src/main/java/org/jabref/logic/net/URLDownload.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -96,6 +97,7 @@ public static void bypassSSLVerification() {

// Create a trust manager that does not validate certificate chains
TrustManager[] trustAllCerts = {new X509TrustManager() {

@Override
public void checkClientTrusted(X509Certificate[] chain, String authType) {
}
Expand Down Expand Up @@ -249,8 +251,14 @@ public void toFile(Path destination) throws IOException {
* Takes the web resource as the source for a monitored input stream.
*/
public ProgressInputStream asInputStream() throws IOException {
URLConnection urlConnection = this.openConnection();
long fileSize = urlConnection.getContentLength();
HttpURLConnection urlConnection = (HttpURLConnection) this.openConnection();

if ((urlConnection.getResponseCode() == HttpURLConnection.HTTP_NOT_FOUND) || (urlConnection.getResponseCode() == HttpURLConnection.HTTP_BAD_REQUEST))
{
LOGGER.error("Response message {} returned for url {}", urlConnection.getResponseMessage(), urlConnection.getURL());
return new ProgressInputStream(new ByteArrayInputStream(new byte[0]), 0);
}
long fileSize = urlConnection.getContentLengthLong();
return new ProgressInputStream(new BufferedInputStream(urlConnection.getInputStream()), fileSize);
}

Expand Down Expand Up @@ -311,8 +319,8 @@ private URLConnection openConnection() throws IOException {
int status = ((HttpURLConnection) connection).getResponseCode();
if (status != HttpURLConnection.HTTP_OK) {
if ((status == HttpURLConnection.HTTP_MOVED_TEMP)
|| (status == HttpURLConnection.HTTP_MOVED_PERM)
|| (status == HttpURLConnection.HTTP_SEE_OTHER)) {
|| (status == HttpURLConnection.HTTP_MOVED_PERM)
|| (status == HttpURLConnection.HTTP_SEE_OTHER)) {
// get redirect url from "location" header field
String newUrl = connection.getHeaderField("Location");
// open the new connnection again
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ public static Optional<ArXivIdentifier> parse(String value) {

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (this == o) {
return true;
}
if ((o == null) || (getClass() != o.getClass())) {
return false;
}

ArXivIdentifier that = (ArXivIdentifier) o;

Expand All @@ -45,6 +49,11 @@ public String getNormalized() {
return identifier;
}

@Override
public String toString() {
return "ArXivIdentifier [identifier=" + identifier + "]";
}

@Override
public Optional<URI> getExternalURI() {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void findByDOI() throws IOException {
entry.setField("doi", "10.1021/bk-2006-STYG.ch014");

assertEquals(
Optional.of(new URL("http://pubs.acs.org/doi/pdf/10.1021/bk-2006-STYG.ch014")),
Optional.of(new URL("https://pubs.acs.org/doi/pdf/10.1021/bk-2006-STYG.ch014")),
finder.findFullText(entry)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,5 +191,8 @@ public void searchIdentifierForSlicePaper() throws Exception {

assertEquals(ArXivIdentifier.parse("1405.2249v1"), finder.findIdentifier(sliceTheoremPaper));
}

@Test
public void searchEmptyId() throws Exception {
assertEquals(Optional.empty(), finder.performSearchById(""));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,7 @@ public void testPerformSearchByIdEmptyDOI() throws Exception {

@Test
public void testPerformSearchByIdInvalidDoi() throws Exception {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("this.doi.will.fail");
assertEquals(Optional.empty(), fetchedEntry);
assertEquals(Optional.empty(), fetcher.performSearchById("this.doi.will.fail"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package org.jabref.logic.importer.fetcher;

import java.util.Collections;
import java.util.Locale;
import java.util.Optional;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibtexEntryTypes;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -115,7 +117,7 @@ public void findByEntry() throws Exception {

@Test
public void performSearchByIdFindsPaperWithoutTitle() throws Exception {
BibEntry entry = new BibEntry("article");
BibEntry entry = new BibEntry(BibtexEntryTypes.ARTICLE);
entry.setField("author", "Dominik Wujastyk");
entry.setField("doi", "10.1023/a:1003473214310");
entry.setField("issn", "0019-7246");
Expand All @@ -125,4 +127,14 @@ public void performSearchByIdFindsPaperWithoutTitle() throws Exception {

assertEquals(Optional.of(entry), fetcher.performSearchById("10.1023/a:1003473214310"));
}

@Test
public void performSearchByEmptyId() throws Exception {
assertEquals(Optional.empty(), fetcher.performSearchById(""));
}

@Test
public void performSearchByEmptyQuery() throws Exception {
assertEquals(Collections.emptyList(), fetcher.performSearch(""));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,21 @@ public class DBLPFetcherTest {
public void setUp() {
ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class);
when(importFormatPreferences.getFieldContentParserPreferences())
.thenReturn(mock(FieldContentParserPreferences.class));
.thenReturn(mock(FieldContentParserPreferences.class));
dblpFetcher = new DBLPFetcher(importFormatPreferences);
entry = new BibEntry();

entry.setType(BibtexEntryTypes.ARTICLE.getName());
entry.setCiteKey("DBLP:journals/stt/GeigerHL16");
entry.setField(FieldName.TITLE,
"Process Engine Benchmarking with Betsy in the Context of {ISO/IEC} Quality Standards");
"Process Engine Benchmarking with Betsy in the Context of {ISO/IEC} Quality Standards");
entry.setField(FieldName.AUTHOR, "Matthias Geiger and Simon Harrer and J{\\\"{o}}rg Lenhard");
entry.setField(FieldName.JOURNAL, "Softwaretechnik-Trends");
entry.setField(FieldName.VOLUME, "36");
entry.setField(FieldName.NUMBER, "2");
entry.setField(FieldName.YEAR, "2016");
entry.setField(FieldName.URL,
"http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf");
"http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf");
entry.setField("biburl", "https://dblp.org/rec/bib/journals/stt/GeigerHL16");
entry.setField("bibsource", "dblp computer science bibliography, https://dblp.org");

Expand All @@ -64,4 +64,9 @@ public void findSingleEntryUsingComplexOperators() throws FetcherException {
assertEquals(Collections.singletonList(entry), result);
}

@Test
public void findNothing() throws Exception {
assertEquals(Collections.emptyList(), dblpFetcher.performSearch(""));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void setUp() {

@Test
void searchByQueryFindsEntry() throws Exception {
BibEntry expected = new BibEntry(BibtexEntryTypes.ARTICLE.getName());
BibEntry expected = new BibEntry(BibtexEntryTypes.ARTICLE);
expected.setField("author", "Wei Wang and Yun He and Tong Li and Jiajun Zhu and Jinzhuo Liu");
expected.setField("doi", "10.1155/2018/5913634");
expected.setField("issn", "1875-919X");
Expand Down Expand Up @@ -65,10 +65,12 @@ void testBibJSONConverter() {
assertEquals(Optional.of("VLSI Design"), bibEntry.getField("journal"));
assertEquals(Optional.of("10.1155/2014/217495"), bibEntry.getField("doi"));
assertEquals(Optional.of("Syed Asad Alam and Oscar Gustafsson"), bibEntry.getField("author"));
assertEquals(
Optional.of(
"Design of Finite Word Length Linear-Phase FIR Filters in the Logarithmic Number System Domain"),
bibEntry.getField("title"));
assertEquals(Optional.of("Design of Finite Word Length Linear-Phase FIR Filters in the Logarithmic Number System Domain"), bibEntry.getField("title"));
assertEquals(Optional.of("2014"), bibEntry.getField("year"));
}

@Test
public void searchByEmptyQuery() throws Exception {
assertEquals(Collections.emptyList(), fetcher.performSearch(""));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void testPerformSearchById() throws Exception {
entry.setField("institution", "Linköping University, The Institute of Technology");
entry.setCiteKey("Gustafsson260746");
entry.setField("journal",
"IEEE transactions on circuits and systems. 2, Analog and digital signal processing (Print)");
"IEEE transactions on circuits and systems. 2, Analog and digital signal processing (Print)");
entry.setField("number", "11");
entry.setField("pages", "974--978");
entry.setField("title", "Lower bounds for constant multiplication problems");
Expand All @@ -65,4 +65,9 @@ public void testValidIdentifier() {
public void testInvalidIdentifier() {
assertFalse(fetcher.isValidId("banana"));
}

@Test
public void testEmptyId() throws Exception {
assertEquals(Optional.empty(), fetcher.performSearchById(""));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,16 @@ public void searchByIdSuccessfulWithLongISBN() throws FetcherException {
public void authorsAreCorrectlyFormatted() throws Exception {
BibEntry bibEntry = new BibEntry();
bibEntry.setType(BiblatexEntryTypes.BOOK);
bibEntry.setField("bibtexkey", "9783642434730");
bibEntry.setField("bibtexkey", "9783662565094");
bibEntry.setField("title", "Fundamentals of Business Process Management");
bibEntry.setField("publisher", "Springer Berlin Heidelberg");
bibEntry.setField("year", "2015");
bibEntry.setField("year", "2018");
bibEntry.setField("author", "Dumas, Marlon and Rosa, Marcello La and Mendling, Jan and Reijers, Hajo A.");
bibEntry.setField("date", "2015-04-12");
bibEntry.setField("ean", "9783642434730");
bibEntry.setField("isbn", "3642434738");
bibEntry.setField("pagetotal", "428");
bibEntry.setField("url", "https://www.ebook.de/de/product/23955263/marlon_dumas_marcello_la_rosa_jan_mendling_hajo_a_reijers_fundamentals_of_business_process_management.html");
bibEntry.setField("date", "2018-03-23");
bibEntry.setField("ean", "9783662565094");
bibEntry.setField("url", "https://www.ebook.de/de/product/33399253/marlon_dumas_marcello_la_rosa_jan_mendling_hajo_a_reijers_fundamentals_of_business_process_management.html");

Optional<BibEntry> fetchedEntry = fetcher.performSearchById("3642434738");
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("978-3-662-56509-4");
assertEquals(Optional.of(bibEntry), fetchedEntry);
}

Expand Down
Loading