Skip to content

Commit

Permalink
Add APS Fetcher (refactored) (JabRef#6143)
Browse files Browse the repository at this point in the history
* Add APS fetcher

* Fix case sensitivity bug

* Refactor ApsFetcher

* Add note about APS fetcher

* Refactor findFulltext()

* Refactor getId()

* Parameterize ApsFetcherTest

* Add link to APS changelog entry

* Refactor APS Fetcher

* make separate tests

Co-authored-by: August Janse <augustj@kth.se>
  • Loading branch information
Siedlerchr and augustjanse authored Mar 18, 2020
1 parent edec608 commit 99183e1
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- Filenames of external files can no longer contain curly braces. [#5926](https://github.com/JabRef/jabref/pull/5926)
- We made the filters more easily accessible in the integrity check dialog. [#5955](https://github.com/JabRef/jabref/pull/5955)
- We reimplemented and improved the dialog "Customize entry types". [#4719](https://github.com/JabRef/jabref/issues/4719)
- We reimplemented and improved the dialog "Customize entry types" [#4719](https://github.com/JabRef/jabref/issues/4719)
- We added an [American Physical Society](https://journals.aps.org/) fetcher. [#818](https://github.com/JabRef/jabref/issues/818)

### Fixed

Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.TreeSet;

import org.jabref.logic.importer.fetcher.ACS;
import org.jabref.logic.importer.fetcher.ApsFetcher;
import org.jabref.logic.importer.fetcher.ArXiv;
import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem;
import org.jabref.logic.importer.fetcher.CiteSeer;
Expand Down Expand Up @@ -159,6 +160,7 @@ public static Set<FulltextFetcher> getFullTextFetchers(ImportFormatPreferences i
fetchers.add(new ACS());
fetchers.add(new ArXiv(importFormatPreferences));
fetchers.add(new IEEE(importFormatPreferences));
fetchers.add(new ApsFetcher());
// Meta search
fetchers.add(new GoogleScholar(importFormatPreferences));
fetchers.add(new OpenAccessDoi());
Expand Down
93 changes: 93 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;

import kong.unirest.Unirest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* FulltextFetcher implementation that attempts to find a PDF URL at APS. Also see the <a
* href="https://harvest.aps.org/docs/harvest-api">API</a>, although it isn't currently used.
*/
public class ApsFetcher implements FulltextFetcher {

private static final Logger LOGGER = LoggerFactory.getLogger(ApsFetcher.class);

// The actual API needs either an API key or a header. This is a workaround.
private static final String DOI_URL = "https://www.doi.org/";
private static final String PDF_URL = "https://journals.aps.org/prl/pdf/";

@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);

Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);

if (!doi.isPresent()) {
return Optional.empty();
}

Optional<String> id = getId(doi.get().getDOI());

if (id.isPresent()) {

String pdfRequestUrl = PDF_URL + id.get();
int code = Unirest.head(pdfRequestUrl).asJson().getStatus();

if (code == 200) {
LOGGER.info("Fulltext PDF found @ APS.");
try {
return Optional.of(new URL(pdfRequestUrl));
} catch (MalformedURLException e) {
LOGGER.warn("APS returned malformed URL, cannot find PDF.");
}
}
}
return Optional.empty();
}

@Override
public TrustLevel getTrustLevel() {
return TrustLevel.PUBLISHER;
}

/**
* Convert a DOI into an appropriate APS id.
*
* @param doi A case insensitive DOI
* @return A DOI cased as APS likes it
*/
private Optional<String> getId(String doi) {
// DOI is not case sensitive, but the id for the PDF URL is,
// so we follow DOI.org redirects to get the proper id.
// https://stackoverflow.com/a/5270162/1729441

String doiRequest = DOI_URL + doi;

URLConnection con;
try {
con = new URL(doiRequest).openConnection();
con.connect();
con.getInputStream();
String[] urlParts = con.getURL().toString().split("abstract/");
if (urlParts.length == 2) {
return Optional.of(urlParts[1]);
}

} catch (IOException e) {
LOGGER.warn("Error connecting to APS", e);
}
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.jabref.logic.importer.fetcher;

import java.net.URL;
import java.util.Optional;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

@FetcherTest
class ApsFetcherTest {

private ApsFetcher finder;

@BeforeEach
void setUp() {
finder = new ApsFetcher();
}

@Test
void findFullTextFromDoi() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.116.061102");
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.116.061102")), finder.findFullText(entry));
}

@Test
void findFullTextFromLowercaseDoi() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/physrevlett.124.029002");
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.124.029002")), finder.findFullText(entry));
}

@Test
void notFindFullTextForUnauthorized() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.89.127401");
assertEquals(Optional.empty(), finder.findFullText(entry));
}

@Test
void notFindFullTextForUnknownEntry() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002");
assertEquals(Optional.empty(), finder.findFullText(entry));
}
}

0 comments on commit 99183e1

Please sign in to comment.