Skip to content

Commit

Permalink
Implement ottobib fetcher
Browse files Browse the repository at this point in the history
Fetcher does not return bibtex data in plain text, instead it's part of an html text area

Fix ISBN tests
Update user agent


Follow up from #5125
  • Loading branch information
Siedlerchr committed Aug 18, 2019
1 parent f96d554 commit 526fcf6
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 64 deletions.
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.DummyFileUpdateMonitor;

import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/**
* Fetcher for ISBN using https://www.ottobib.com
*/
public class IsbnViaOttoBibFetcher extends AbstractIsbnFetcher {

private static final String BASE_URL = "https://www.ottobib.com/isbn/";

public IsbnViaOttoBibFetcher(ImportFormatPreferences importFormatPreferences) {
super(importFormatPreferences);
}
Expand All @@ -46,38 +51,20 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc

this.ensureThatIsbnIsValid(identifier);

HttpResponse<String> postResponse;

String BASE_URL = "https://www.ottobib.com/isbn/" + identifier + "/bibtex";

Document html;
try {
postResponse = Unirest.post(BASE_URL)
.asString();
} catch (UnirestException e) {
throw new FetcherException("Could not retrieve data from ottobib.com", e);
html = Jsoup.connect(BASE_URL + identifier + "/bibtex").userAgent(URLDownload.USER_AGENT).get();
} catch (IOException e) {
throw new FetcherException("Could not ", e);
}
if (postResponse.getStatus() != 200) {
throw new FetcherException("Error while retrieving data from ottobib.com: " + postResponse.getBody());
}

List<BibEntry> fetchedEntries;
Element textArea = html.select("textarea").first();
Optional<BibEntry> entry = Optional.empty();
try {
fetchedEntries = getParser().parseEntries(postResponse.getRawBody());
entry = BibtexParser.singleFromString(textArea.text(), importFormatPreferences, new DummyFileUpdateMonitor());
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
if (fetchedEntries.isEmpty()) {
return Optional.empty();
} else if (fetchedEntries.size() > 1) {
LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier
+ ". We will use the first entry.");
}

BibEntry entry = fetchedEntries.get(0);

// ottobib does not return an ISBN.
entry.setField("isbn", identifier);
return entry;

return Optional.of(entry);
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/net/URLDownload.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
*/
public class URLDownload {

public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0";
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0";

private static final Logger LOGGER = LoggerFactory.getLogger(URLDownload.class);
private final URL source;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public void setUp() {
bibEntry.setField(StandardField.AUTHOR, "Joshua Bloch");
bibEntry.setField(StandardField.ISBN, "978-0321356680");
bibEntry.setField(StandardField.URL,
"https://www.amazon.com/Effective-Java-2nd-Joshua-Bloch/dp/0321356683?SubscriptionId=AKIAIOBINVZYXZQZ2U3A&tag=chimbori05-20&linkCode=xm2&camp=2025&creative=165953&creativeASIN=0321356683");
"https://www.amazon.com/Effective-Java-2nd-Joshua-Bloch/dp/0321356683?SubscriptionId=AKIAIOBINVZYXZQZ2U3A&tag=chimbori05-20&linkCode=xm2&camp=2025&creative=165953&creativeASIN=0321356683");

fetcher = new IsbnViaChimboriFetcher(mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS));
}
Expand Down Expand Up @@ -78,7 +78,7 @@ public void authorsAreCorrectlyFormatted() throws Exception {
bibEntry.setField(StandardField.AUTHOR, "Marlon Dumas and Marcello La Rosa and Jan Mendling and Hajo A. Reijers");
bibEntry.setField(StandardField.ISBN, "3642434738");
bibEntry.setField(StandardField.URL,
"https://www.amazon.com/Fundamentals-Business-Process-Management-Marlon/dp/3642434738?SubscriptionId=AKIAIOBINVZYXZQZ2U3A&tag=chimbori05-20&linkCode=xm2&camp=2025&creative=165953&creativeASIN=3642434738");
"https://www.amazon.com/Fundamentals-Business-Process-Management-Marlon/dp/3642434738?SubscriptionId=AKIAIOBINVZYXZQZ2U3A&tag=chimbori05-20&linkCode=xm2&camp=2025&creative=165953&creativeASIN=3642434738");

Optional<BibEntry> fetchedEntry = fetcher.performSearchById("3642434738");
assertEquals(Optional.of(bibEntry), fetchedEntry);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public void setUp() {
bibEntry.setField(StandardField.PUBLISHER, "Addison Wesley");
bibEntry.setField(StandardField.YEAR, "2018");
bibEntry.setField(StandardField.AUTHOR, "Bloch, Joshua");
bibEntry.setField(StandardField.DATE, "2018-01-11");
bibEntry.setField(StandardField.DATE, "2018-01-01");
bibEntry.setField(new UnknownField("ean"), "9780134685991");
bibEntry.setField(StandardField.ISBN, "0134685997");
bibEntry.setField(StandardField.URL, "https://www.ebook.de/de/product/28983211/joshua_bloch_effective_java.html");
Expand Down Expand Up @@ -68,26 +68,29 @@ public void searchByIdSuccessfulWithLongISBN() throws FetcherException {
public void authorsAreCorrectlyFormatted() throws Exception {
BibEntry bibEntry = new BibEntry();
bibEntry.setType(StandardEntryType.Book);
bibEntry.setCiteKey("9783662565094");
bibEntry.setCiteKey("9783662585856");
bibEntry.setField(StandardField.TITLE, "Fundamentals of Business Process Management");
bibEntry.setField(StandardField.PUBLISHER, "Springer Berlin Heidelberg");
bibEntry.setField(StandardField.YEAR, "2018");
bibEntry.setField(StandardField.YEAR, "2019");
bibEntry.setField(StandardField.AUTHOR, "Dumas, Marlon and Rosa, Marcello La and Mendling, Jan and Reijers, Hajo A.");
bibEntry.setField(StandardField.DATE, "2018-03-23");
bibEntry.setField(new UnknownField("ean"), "9783662565094");
bibEntry.setField(StandardField.URL, "https://www.ebook.de/de/product/33399253/marlon_dumas_marcello_la_rosa_jan_mendling_hajo_a_reijers_fundamentals_of_business_process_management.html");
bibEntry.setField(StandardField.DATE, "2019-02-01");
bibEntry.setField(StandardField.PAGETOTAL, "560");
bibEntry.setField(new UnknownField("ean"), "9783662585856");
bibEntry.setField(StandardField.ISBN, "3662585855");
bibEntry.setField(StandardField.URL, "https://www.ebook.de/de/product/35805105/marlon_dumas_marcello_la_rosa_jan_mendling_hajo_a_reijers_fundamentals_of_business_process_management.html");

Optional<BibEntry> fetchedEntry = fetcher.performSearchById("978-3-662-56509-4");
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("3662585855");
assertEquals(Optional.of(bibEntry), fetchedEntry);
}

/**
* This test searches for a valid ISBN. See https://www.amazon.de/dp/3728128155/?tag=jabref-21 However, this ISBN is
* not available on ebook.de. The fetcher should return nothing rather than throwing an exeption.
* not available on ebook.de. The fetcher should return nothing rather than throwing an exception.
*/
@Test
public void searchForValidButNotFoundISBN() throws Exception {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("3728128155");
assertEquals(Optional.empty(), fetchedEntry);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BiblatexEntryTypes;
import org.jabref.model.entry.StandardEntryType;
import org.jabref.model.entry.field.StandardField;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
Expand All @@ -21,14 +22,14 @@ public class IsbnViaOttoBibFetcherTest extends AbstractIsbnFetcherTest {
@BeforeEach
public void setUp() {
bibEntry = new BibEntry();
bibEntry.setType(BiblatexEntryTypes.BOOK);
bibEntry.setField("bibtexkey", "9782819502746");
bibEntry.setField("title", "Les mots du passé : roman");
bibEntry.setField("publisher", "́Éd. les Nouveaux auteurs");
bibEntry.setField("year", "2012");
bibEntry.setField("author", "Denis");
bibEntry.setField("isbn", "978-2-8195-02746");
bibEntry.setField("url", "https://www.ottobib.com/isbn/9782819502746/bibtex");
bibEntry.setType(StandardEntryType.Book);
bibEntry.setCiteKey("bloch2008effective");
bibEntry.setField(StandardField.TITLE, "Effective Java");
bibEntry.setField(StandardField.PUBLISHER, "Addison-Wesley");
bibEntry.setField(StandardField.YEAR, "2008");
bibEntry.setField(StandardField.AUTHOR, "Bloch, Joshua");
bibEntry.setField(StandardField.ISBN, "9780321356680");
bibEntry.setField(StandardField.ADDRESS, "Upper Saddle River, NJ");

fetcher = new IsbnViaOttoBibFetcher(mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS));
}
Expand All @@ -49,34 +50,50 @@ public void testHelpPage() {
@Override
public void searchByIdSuccessfulWithShortISBN() throws FetcherException {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("0321356683");
bibEntry.setField("bibtexkey", "0321356683");
bibEntry.setField("isbn", "0321356683");
bibEntry.setField(StandardField.ISBN, "0321356683");
assertEquals(Optional.of(bibEntry), fetchedEntry);
}

@Test
@Override
public void searchByIdSuccessfulWithLongISBN() throws FetcherException {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("9780321356680");
bibEntry.setField("bibtexkey", "9780321356680");
bibEntry.setField("isbn", "9780321356680");
bibEntry.setField(StandardField.ISBN, "9780321356680");
assertEquals(Optional.of(bibEntry), fetchedEntry);
}

@Test
@Override
public void authorsAreCorrectlyFormatted() throws Exception {
BibEntry bibEntry = new BibEntry();
bibEntry.setType(BiblatexEntryTypes.BOOK);
bibEntry.setField("bibtexkey", "9782819502746");
bibEntry.setField("title", "Les mots du passé : roman");
bibEntry.setField("publisher", "́Éd. les Nouveaux auteurs");
bibEntry.setField("year", "2012");
bibEntry.setField("author", "Denis");
bibEntry.setField("isbn", "978-2-8195-02746");
bibEntry.setField("url", "https://www.ottobib.com/isbn/9782819502746/bibtex");

Optional<BibEntry> fetchedEntry = fetcher.performSearchById("9782819502746");
bibEntry.setType(StandardEntryType.Book);
bibEntry.setCiteKey("dumas2018fundamentals");
bibEntry.setField(StandardField.TITLE, "Fundamentals of business process management");
bibEntry.setField(StandardField.PUBLISHER, "Springer");
bibEntry.setField(StandardField.AUTHOR, "Dumas, Marlon");
bibEntry.setField(StandardField.ADDRESS, "Berlin, Germany");
bibEntry.setField(StandardField.ISBN, "9783662565094");
bibEntry.setField(StandardField.YEAR, "2018");

Optional<BibEntry> fetchedEntry = fetcher.performSearchById("978-3-662-56509-4");
assertEquals(Optional.of(bibEntry), fetchedEntry);
}

@Test
public void testISBNNotAvaiableOnEbookDeOrChimbori() throws Exception {
bibEntry = new BibEntry();
bibEntry.setType(StandardEntryType.Book);
bibEntry.setCiteKey("denis2012les");
bibEntry.setField(StandardField.TITLE, "Les mots du passé : roman");
bibEntry.setField(StandardField.PUBLISHER, "Éd. les Nouveaux auteurs");
bibEntry.setField(StandardField.ADDRESS, "Paris");
bibEntry.setField(StandardField.YEAR, "2012");
bibEntry.setField(StandardField.AUTHOR, "Denis, ");
bibEntry.setField(StandardField.ISBN, "9782819502746");

Optional<BibEntry> fetchedEntry = fetcher.performSearchById("978-2-8195-02746");
assertEquals(Optional.of(bibEntry), fetchedEntry);

}

}

0 comments on commit 526fcf6

Please sign in to comment.