Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix inspire fetcher #6258

Merged
merged 6 commits into from
Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue with inconsistent capitalization of file extensions when downloading files. [#6115](https://github.com/JabRef/jabref/issues/6115)
- We fixed the display of language and encoding in the preferences dialog. [#6130](https://github.com/JabRef/jabref/pull/6130)
- We fixed an issue where search full-text documents downloaded files with same name, overwriting existing files. [#6174](https://github.com/JabRef/jabref/pull/6174)
- We fixed an issue where when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
- We fixed an issue when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
- We fixed an issue where custom jstyles for Open/LibreOffice where not saved correctly. [#6170](https://github.com/JabRef/jabref/issues/6170)
- We fixed an issue where the INSPIRE fetcher was no longer working [#6229](https://github.com/JabRef/jabref/issues/6229)


### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,24 @@ default void doPostCleanup(BibEntry entry) {
// Do nothing by default
}

/**
* Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download
* @param query The search query
* @throws MalformedURLException
* @throws FetcherException
* @throws URISyntaxException
*/
default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
return new URLDownload(getURLForQuery(query));
}

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try (InputStream stream = new URLDownload(getURLForQuery(query)).asInputStream()) {
try (InputStream stream = getUrlDownload(query).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

// Post-cleanup
Expand Down
52 changes: 15 additions & 37 deletions src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
package org.jabref.logic.importer.fetcher;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
Expand All @@ -18,32 +13,28 @@
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.util.OS;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.field.UnknownField;
import org.jabref.model.util.DummyFileUpdateMonitor;

import org.apache.http.client.utils.URIBuilder;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
* Fetches data from the INSPIRE database.
*
* @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML
*/
public class INSPIREFetcher implements SearchBasedParserFetcher {

private static final String INSPIRE_HOST = "https://inspirehep.net/search";
private static final String INSPIRE_HOST = "https://inspirehep.net/api/literature/";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please include the link to the documentation of the new api as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is not yet any documentation avaiable. I contacted INSPIRE hep team.


private final ImportFormatPreferences preferences;
private final ImportFormatPreferences importFormatPreferences;

public INSPIREFetcher(ImportFormatPreferences preferences) {
this.preferences = preferences;
this.importFormatPreferences = preferences;
}

@Override
Expand All @@ -59,33 +50,15 @@ public Optional<HelpFile> getHelpPage() {
@Override
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST);
uriBuilder.addParameter("p", query); // Query
// uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment)
uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25)
uriBuilder.addParameter("of", "hx"); // BibTeX format
uriBuilder.addParameter("q", query); // Query
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no paging support (start + size) for the new interface?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The json api has support for it, but when calling with the bibtex header not.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On my end &size=10&page=2 seems to works fine as of today, not sure about a week ago.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, this seems to work. I will create a follow up PR

return uriBuilder.build().toURL();
}

@Override
public Parser getParser() {
// Inspire returns the BibTeX result embedded in HTML
// So we extract the BibTeX string from the <pre>bibtex</pre> tags and pass the content to the BibTeX parser
return inputStream -> {
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));

List<BibEntry> entries = new ArrayList<>();

Document doc = Jsoup.parse(response);
Elements preElements = doc.getElementsByTag("pre");

for (Element elem : preElements) {
// We have to use a new instance here, because otherwise only the first entry gets parsed
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
List<BibEntry> entry = bibtexParser.parseEntries(elem.text());
entries.addAll(entry);
}
return entries;
};
public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
URLDownload download = new URLDownload(getURLForQuery(query));
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
return download;
}

@Override
Expand All @@ -96,4 +69,9 @@ public void doPostCleanup(BibEntry entry) {
// Remove braces around content of "title" field
new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
}

@Override
public Parser getParser() {
return new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.jabref.logic.importer.fetcher;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.bibtex.FieldContentFormatterPreferences;
Expand Down Expand Up @@ -32,24 +32,6 @@ void setUp() {

@Test
void searchByQueryFindsEntry() throws Exception {
BibEntry phd = new BibEntry(StandardEntryType.PhdThesis);
phd.setCiteKey("Diez:2019pkg");
phd.setField(StandardField.AUTHOR, "Diez, Tobias");
phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory");
phd.setField(StandardField.YEAR, "2019");
phd.setField(StandardField.EPRINT, "1909.00744");
phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179");
phd.setField(StandardField.ARCHIVEPREFIX, "arXiv");
phd.setField(StandardField.PRIMARYCLASS, "math.SG");

BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Diez:2018gjz");
article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd");
article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory");
article.setField(StandardField.YEAR, "2018");
article.setField(StandardField.EPRINT, "1812.04707");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(StandardField.PRIMARYCLASS, "math-ph");

BibEntry master = new BibEntry(StandardEntryType.MastersThesis);
master.setCiteKey("Diez:2014ppa");
Expand All @@ -63,6 +45,27 @@ void searchByQueryFindsEntry() throws Exception {

List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");

assertEquals(Arrays.asList(phd, article, master), fetchedEntries);
assertEquals(Collections.singletonList(master), fetchedEntries);
}

@Test
public void searchByIdentifierFindsEntry() throws Exception {
BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Melnikov:1998pr");
article.setField(StandardField.AUTHOR, "Melnikov, Kirill and Yelkhovsky, Alexander");
article.setField(StandardField.TITLE, "Top quark production at threshold with O(alpha-s**2) accuracy");
article.setField(StandardField.DOI, "10.1016/S0550-3213(98)00348-4");
article.setField(StandardField.JOURNAL, "Nucl.\\ Phys.\\ B");
article.setField(StandardField.PAGES, "59--72");
article.setField(StandardField.VOLUME, "528");
article.setField(StandardField.YEAR, "1998");
article.setField(StandardField.EPRINT, "hep-ph/9802379");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10");

List<BibEntry> fetchedEntries = fetcher.performSearch("hep-ph/9802379");

assertEquals(Collections.singletonList(article), fetchedEntries);

}
}