Skip to content

Commit

Permalink
Fix inspire fetcher
Browse files Browse the repository at this point in the history
Use application/x-bibtex header
Fixes #6229
  • Loading branch information
Siedlerchr committed Apr 6, 2020
1 parent 8296c3c commit bac8bcf
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 53 deletions.
71 changes: 39 additions & 32 deletions src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java
Original file line number Diff line number Diff line change
@@ -1,49 +1,46 @@
package org.jabref.logic.importer.fetcher;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.util.OS;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.field.UnknownField;
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.DummyFileUpdateMonitor;

import org.apache.http.client.utils.URIBuilder;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
* Fetches data from the INSPIRE database.
*
* @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML
*/
public class INSPIREFetcher implements SearchBasedParserFetcher {

private static final String INSPIRE_HOST = "https://inspirehep.net/search";
private static final String INSPIRE_HOST = "https://inspirehep.net/api/literature/";

private final ImportFormatPreferences preferences;
private final ImportFormatPreferences importFormatPreferences;

public INSPIREFetcher(ImportFormatPreferences preferences) {
this.preferences = preferences;
this.importFormatPreferences = preferences;
}

@Override
Expand All @@ -59,33 +56,38 @@ public Optional<HelpFile> getHelpPage() {
@Override
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST);
uriBuilder.addParameter("p", query); // Query
//uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment)
uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25)
uriBuilder.addParameter("of", "hx"); // BibTeX format
uriBuilder.addParameter("q", query); // Query
return uriBuilder.build().toURL();
}

@Override
public Parser getParser() {
// Inspire returns the BibTeX result embedded in HTML
// So we extract the BibTeX string from the <pre>bibtex</pre> tags and pass the content to the BibTeX parser
return inputStream -> {
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
public List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try {
URLDownload download = new URLDownload(getURLForQuery(query));
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);

List<BibEntry> entries = new ArrayList<>();
try (InputStream stream = download.asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

Document doc = Jsoup.parse(response);
Elements preElements = doc.getElementsByTag("pre");
// Post-cleanup
fetchedEntries.forEach(this::doPostCleanup);

for (Element elem : preElements) {
//We have to use a new instance here, because otherwise only the first entry gets parsed
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
List<BibEntry> entry = bibtexParser.parseEntries(elem.text());
entries.addAll(entry);
return fetchedEntries;
}
return entries;
};

} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
// TODO: Catch HTTP Response 401/403 errors and report that user has no rights to access resource
throw new FetcherException("A network error occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}

}

@Override
Expand All @@ -96,4 +98,9 @@ public void doPostCleanup(BibEntry entry) {
// Remove braces around content of "title" field
new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
}

@Override
public Parser getParser() {
return new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/openoffice/OOUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ public static void insertTextAtCurrentLocation(XText text, XTextCursor cursor, S
} */
if (formatting.contains(Formatting.SUBSCRIPT)) {
xCursorProps.setPropertyValue(CHAR_ESCAPEMENT,
(byte) -101);
(byte) -33);
xCursorProps.setPropertyValue(CHAR_ESCAPEMENT_HEIGHT,
(byte) 58);
} else if (formatting.contains(Formatting.SUPERSCRIPT)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.jabref.logic.importer.fetcher;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.bibtex.FieldContentFormatterPreferences;
Expand Down Expand Up @@ -32,24 +32,6 @@ void setUp() {

@Test
void searchByQueryFindsEntry() throws Exception {
BibEntry phd = new BibEntry(StandardEntryType.PhdThesis);
phd.setCiteKey("Diez:2019pkg");
phd.setField(StandardField.AUTHOR, "Diez, Tobias");
phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory");
phd.setField(StandardField.YEAR, "2019");
phd.setField(StandardField.EPRINT, "1909.00744");
phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179");
phd.setField(StandardField.ARCHIVEPREFIX, "arXiv");
phd.setField(StandardField.PRIMARYCLASS, "math.SG");

BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Diez:2018gjz");
article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd");
article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory");
article.setField(StandardField.YEAR, "2018");
article.setField(StandardField.EPRINT, "1812.04707");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(StandardField.PRIMARYCLASS, "math-ph");

BibEntry master = new BibEntry(StandardEntryType.MastersThesis);
master.setCiteKey("Diez:2014ppa");
Expand All @@ -63,6 +45,27 @@ void searchByQueryFindsEntry() throws Exception {

List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");

assertEquals(Arrays.asList(phd, article, master), fetchedEntries);
assertEquals(Collections.singletonList(master), fetchedEntries);
}

@Test
public void searchByIdentifierFindsEntry() throws Exception {
BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Melnikov:1998pr");
article.setField(StandardField.AUTHOR, "Melnikov, Kirill and Yelkhovsky, Alexander");
article.setField(StandardField.TITLE, "Top quark production at threshold with O(alpha-s**2) accuracy");
article.setField(StandardField.DOI, "10.1016/S0550-3213(98)00348-4");
article.setField(StandardField.JOURNAL, "Nucl.\\ Phys.\\ B");
article.setField(StandardField.PAGES, "59--72");
article.setField(StandardField.VOLUME, "528");
article.setField(StandardField.YEAR, "1998");
article.setField(StandardField.EPRINT, "hep-ph/9802379");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10");

List<BibEntry> fetchedEntries = fetcher.performSearch("hep-ph/9802379");

assertEquals(Collections.singletonList(article), fetchedEntries);

}
}

0 comments on commit bac8bcf

Please sign in to comment.