Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add APS Fetcher (refactored) #6143

Merged
merged 13 commits into from
Mar 18, 2020
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- Filenames of external files can no longer contain curly braces. [#5926](https://github.com/JabRef/jabref/pull/5926)
- We made the filters more easily accessible in the integrity check dialog. [#5955](https://github.com/JabRef/jabref/pull/5955)
- We reimplemented and improved the dialog "Customize entry types". [#4719](https://github.com/JabRef/jabref/issues/4719)
- We reimplemented and improved the dialog "Customize entry types" [#4719](https://github.com/JabRef/jabref/issues/4719)
koppor marked this conversation as resolved.
Show resolved Hide resolved
- We added an [American Physical Society](https://journals.aps.org/) fetcher. [#818](https://github.com/JabRef/jabref/issues/818)

### Fixed

Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.TreeSet;

import org.jabref.logic.importer.fetcher.ACS;
import org.jabref.logic.importer.fetcher.ApsFetcher;
import org.jabref.logic.importer.fetcher.ArXiv;
import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem;
import org.jabref.logic.importer.fetcher.CiteSeer;
Expand Down Expand Up @@ -159,6 +160,7 @@ public static Set<FulltextFetcher> getFullTextFetchers(ImportFormatPreferences i
fetchers.add(new ACS());
fetchers.add(new ArXiv(importFormatPreferences));
fetchers.add(new IEEE(importFormatPreferences));
fetchers.add(new ApsFetcher());
// Meta search
fetchers.add(new GoogleScholar(importFormatPreferences));
fetchers.add(new OpenAccessDoi());
Expand Down
93 changes: 93 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;

import kong.unirest.Unirest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* FulltextFetcher implementation that attempts to find a PDF URL at APS. Also see the <a
* href="https://harvest.aps.org/docs/harvest-api">API</a>, although it isn't currently used.
*/
public class ApsFetcher implements FulltextFetcher {

private static final Logger LOGGER = LoggerFactory.getLogger(ApsFetcher.class);

// The actual API needs either an API key or a header. This is a workaround.
private static final String DOI_URL = "https://www.doi.org/";
private static final String PDF_URL = "https://journals.aps.org/prl/pdf/";

@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);

Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);

if (!doi.isPresent()) {
return Optional.empty();
}

Optional<String> id = getId(doi.get().getDOI());

if (id.isPresent()) {

String pdfRequestUrl = PDF_URL + id.get();
int code = Unirest.head(pdfRequestUrl).asJson().getStatus();

if (code == 200) {
LOGGER.info("Fulltext PDF found @ APS.");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess debug level suffices

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's info at the other fetchers as well

try {
return Optional.of(new URL(pdfRequestUrl));
} catch (MalformedURLException e) {
LOGGER.warn("APS returned malformed URL, cannot find PDF.");
}
}
}
return Optional.empty();
}

@Override
public TrustLevel getTrustLevel() {
return TrustLevel.PUBLISHER;
}

/**
* Convert a DOI into an appropriate APS id.
*
* @param doi A case insensitive DOI
* @return A DOI cased as APS likes it
*/
private Optional<String> getId(String doi) {
// DOI is not case sensitive, but the id for the PDF URL is,
// so we follow DOI.org redirects to get the proper id.
// https://stackoverflow.com/a/5270162/1729441

String doiRequest = DOI_URL + doi;

URLConnection con;
try {
con = new URL(doiRequest).openConnection();
con.connect();
con.getInputStream();
String[] urlParts = con.getURL().toString().split("abstract/");
if (urlParts.length == 2) {
return Optional.of(urlParts[1]);
}

} catch (IOException e) {
LOGGER.warn("Error connecting to APS", e);
}
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.jabref.logic.importer.fetcher;

import java.net.URL;
import java.util.Optional;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

@FetcherTest
class ApsFetcherTest {

private ApsFetcher finder;

@BeforeEach
void setUp() {
finder = new ApsFetcher();
}

@Test
void findFullTextFromDoi() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.116.061102");
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.116.061102")), finder.findFullText(entry));
}

@Test
void findFullTextFromLowercaseDoi() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/physrevlett.124.029002");
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.124.029002")), finder.findFullText(entry));
}

@Test
void notFindFullTextForUnauthorized() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.89.127401");
assertEquals(Optional.empty(), finder.findFullText(entry));
}

@Test
void notFindFullTextForUnknownEntry() throws Exception {
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002");
assertEquals(Optional.empty(), finder.findFullText(entry));
}
}