-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New fetcher #1929
New fetcher #1929
Changes from 1 commit
a90bc21
4fc8551
443f50f
1b57e28
d39fc73
adfe299
d3b62be
9539d47
8d98ad9
9816aa7
528b197
cde8b17
0655709
f9c2ce6
db93bbb
3979146
e1002d3
6737da0
a1f98e8
61579c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,14 +26,16 @@ | |
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import net.sf.jabref.logic.formatter.Formatter; | ||
import net.sf.jabref.model.entry.BibEntry; | ||
|
||
import org.jsoup.helper.StringUtil; | ||
|
||
/** | ||
* Provides a convenient interface for search-based fetcher, which follow the usual two-step procedure: | ||
* Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure: | ||
* 1. Open a URL based on the search query | ||
* 2. Parse the response to get a list of {@link BibEntry} | ||
* 3. Apply some {@link Formatter} | ||
*/ | ||
public interface SearchBasedParserFetcher extends SearchBasedFetcher { | ||
|
||
|
@@ -48,14 +50,36 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher { | |
*/ | ||
Parser getParser(); | ||
|
||
/** | ||
* Performs a cleanup of the fetched entry. | ||
* | ||
* Only systematic errors of the fetcher should be corrected here | ||
* (i.e. if information is consistently contained in the wrong field or the wrong format) | ||
* but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract). | ||
* | ||
* Try to reuse existing {@link Formatter} for the cleanup. For example, | ||
* {@code new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry);} | ||
* | ||
* By default, no cleanup is done. | ||
* @param entry the entry to be cleaned-up | ||
*/ | ||
default void doPostCleanup(BibEntry entry) { | ||
// Do nothing | ||
} | ||
|
||
@Override | ||
default List<BibEntry> performSearch(String query) throws FetcherException { | ||
if (StringUtil.isBlank(query)) { | ||
return Collections.emptyList(); | ||
} | ||
|
||
try (InputStream stream = new BufferedInputStream(getQueryURL(query).openStream())) { | ||
return getParser().parseEntries(stream); | ||
List<BibEntry> fetchedEntries = getParser().parseEntries(stream); | ||
|
||
// Post-cleanup | ||
fetchedEntries.forEach(this::doPostCleanup); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the implementor of |
||
|
||
return fetchedEntries; | ||
} catch (URISyntaxException e) { | ||
throw new FetcherException("Search URI is malformed", e); | ||
} catch (IOException e) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright (C) 2003-2016 JabRef contributors. | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 2 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along | ||
* with this program; if not, write to the Free Software Foundation, Inc., | ||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
*/ | ||
|
||
package net.sf.jabref.logic.importer.fetcher; | ||
|
||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.Objects; | ||
|
||
import net.sf.jabref.logic.cleanup.FieldFormatterCleanup; | ||
import net.sf.jabref.logic.formatter.bibtexfields.ClearFormatter; | ||
import net.sf.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter; | ||
import net.sf.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; | ||
import net.sf.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter; | ||
import net.sf.jabref.logic.help.HelpFile; | ||
import net.sf.jabref.logic.importer.FetcherException; | ||
import net.sf.jabref.logic.importer.ImportFormatPreferences; | ||
import net.sf.jabref.logic.importer.Parser; | ||
import net.sf.jabref.logic.importer.SearchBasedParserFetcher; | ||
import net.sf.jabref.logic.importer.fileformat.BibtexParser; | ||
import net.sf.jabref.model.entry.BibEntry; | ||
import net.sf.jabref.model.entry.FieldName; | ||
|
||
import org.apache.http.client.utils.URIBuilder; | ||
|
||
/** | ||
* Fetches data from the SAO/NASA Astrophysics Data System (http://www.adsabs.harvard.edu/) | ||
* | ||
* Search query-based: http://adsabs.harvard.edu/basic_search.html | ||
*/ | ||
public class AstrophysicsDataSystem implements SearchBasedParserFetcher { | ||
|
||
private static String API_URL = "http://adsabs.harvard.edu/cgi-bin/nph-basic_connect"; | ||
private final ImportFormatPreferences preferences; | ||
|
||
public AstrophysicsDataSystem(ImportFormatPreferences preferences) { | ||
this.preferences = Objects.requireNonNull(preferences); | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return "SAO/NASA Astrophysics Data System"; | ||
} | ||
|
||
@Override | ||
public HelpFile getHelpPage() { | ||
return null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we have a default implementation? Maybe, we want to force the developers to think about a help page? Nevertheless, I think a default implementation returning |
||
} | ||
|
||
@Override | ||
public URL getQueryURL(String query) throws URISyntaxException, MalformedURLException, FetcherException { | ||
URIBuilder uriBuilder = new URIBuilder(API_URL); | ||
uriBuilder.addParameter("qsearch", query); | ||
uriBuilder.addParameter("data_type", "BIBTEXPLUS"); | ||
uriBuilder.addParameter("start_nr", String.valueOf(1)); | ||
uriBuilder.addParameter("nr_to_return", String.valueOf(200)); | ||
return uriBuilder.build().toURL(); | ||
} | ||
|
||
@Override | ||
public Parser getParser() { | ||
return new BibtexParser(preferences); | ||
} | ||
|
||
@Override | ||
public void doPostCleanup(BibEntry entry) { | ||
new FieldFormatterCleanup(FieldName.ABSTRACT, new RemoveBracesFormatter()).cleanup(entry); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment for future work: Can we rework the formatter code to follow some sort of builder pattern? I once stumbled upon EqualsBuilder and found it quite nice. |
||
new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry); | ||
new FieldFormatterCleanup(FieldName.AUTHOR, new NormalizeNamesFormatter()).cleanup(entry); | ||
new FieldFormatterCleanup("adsnote", new ClearFormatter()).cleanup(entry); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you provide example content in the comment and reason why it is not included? Personally, I keep as much information in the bib files as the styles ignore them nevertheless. |
||
new FieldFormatterCleanup("adsurl", new ClearFormatter()).cleanup(entry); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
* Copyright (C) 2003-2016 JabRef contributors. | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 2 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along | ||
* with this program; if not, write to the Free Software Foundation, Inc., | ||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
*/ | ||
|
||
package net.sf.jabref.logic.importer.fetcher; | ||
|
||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import net.sf.jabref.logic.bibtex.FieldContentParserPreferences; | ||
import net.sf.jabref.logic.importer.ImportFormatPreferences; | ||
import net.sf.jabref.model.entry.BibEntry; | ||
import net.sf.jabref.model.entry.BibtexEntryTypes; | ||
|
||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import static net.sf.jabref.logic.util.OS.NEWLINE; | ||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertTrue; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
|
||
public class AstrophysicsDataSystemTest { | ||
|
||
AstrophysicsDataSystem fetcher; | ||
|
||
@Before | ||
public void setUp() throws Exception { | ||
ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class); | ||
when(importFormatPreferences.getFieldContentParserPreferences()).thenReturn( | ||
mock(FieldContentParserPreferences.class)); | ||
fetcher = new AstrophysicsDataSystem(importFormatPreferences); | ||
} | ||
|
||
@Test | ||
public void performSearchFindsEntry() throws Exception { | ||
BibEntry diezSliceTheoremEntry = new BibEntry(); | ||
diezSliceTheoremEntry.setType(BibtexEntryTypes.ARTICLE); | ||
diezSliceTheoremEntry.setCiteKey("2014arXiv1405.2249D"); | ||
diezSliceTheoremEntry.setField("author", "Diez, T."); | ||
diezSliceTheoremEntry.setField("title", "Slice theorem for Fr$\\backslash$'echet group actions and covariant symplectic field theory"); | ||
diezSliceTheoremEntry.setField("year", "2014"); | ||
diezSliceTheoremEntry.setField("archiveprefix", "arXiv"); | ||
diezSliceTheoremEntry.setField("eprint", "1405.2249"); | ||
diezSliceTheoremEntry.setField("journal", "ArXiv e-prints"); | ||
diezSliceTheoremEntry.setField("keywords", "Mathematical Physics, Mathematics - Differential Geometry, Mathematics - Symplectic Geometry, 58B99, 58Z05, 58B25, 22E65, 58D19, 53D20, 53D42"); | ||
diezSliceTheoremEntry.setField("month", "#may#"); | ||
diezSliceTheoremEntry.setField("primaryclass", "math-ph"); | ||
diezSliceTheoremEntry.setField("abstract", | ||
"A general slice theorem for the action of a Fr$\\backslash$'echet Lie group on a" + NEWLINE | ||
+ "Fr$\\backslash$'echet manifolds is established. The Nash-Moser theorem provides the" | ||
+ NEWLINE + "fundamental tool to generalize the result of Palais to this" + NEWLINE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you move the |
||
+ "infinite-dimensional setting. The presented slice theorem is illustrated" + NEWLINE | ||
+ "by its application to gauge theories: the action of the gauge" + NEWLINE | ||
+ "transformation group admits smooth slices at every point and thus the" + NEWLINE | ||
+ "gauge orbit space is stratified by Fr$\\backslash$'echet manifolds. Furthermore, a" + NEWLINE | ||
+ "covariant and symplectic formulation of classical field theory is" + NEWLINE | ||
+ "proposed and extensively discussed. At the root of this novel framework" + NEWLINE | ||
+ "is the incorporation of field degrees of freedom F and spacetime M into" + NEWLINE | ||
+ "the product manifold F * M. The induced bigrading of differential forms" + NEWLINE | ||
+ "is used in order to carry over the usual symplectic theory to this new" + NEWLINE | ||
+ "setting. The examples of the Klein-Gordon field and general Yang-Mills" + NEWLINE | ||
+ "theory illustrate that the presented approach conveniently handles the" + NEWLINE | ||
+ "occurring symmetries." + NEWLINE); | ||
|
||
List<BibEntry> fetchedEntries = fetcher.performSearch("Diez slice theorem"); | ||
assertEquals(Collections.singletonList(diezSliceTheoremEntry), fetchedEntries); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@zellerdev This is interesting for you as we discussed this about two weeks ago.