From 378a0ec2b6e6aae766b947230385e3479a31f21d Mon Sep 17 00:00:00 2001 From: Tobias Diez Date: Sun, 8 Jul 2018 17:48:26 +0200 Subject: [PATCH 1/2] Convert CiteSeerX fetcher to new infrastructure The old implementation first extracted the detail pages for each article matched by the query and then went to every detail page to extract the bibliographic information. Instead, we now parse the COinS information that is already contained in the main result page and thereby reduce the number of requests to 1 per query. --- .../importer/fetcher/CiteSeerXFetcher.java | 162 ------------------ .../gui/importer/fetcher/EntryFetchers.java | 1 - .../org/jabref/logic/importer/Parser.java | 5 + .../jabref/logic/importer/WebFetchers.java | 2 + .../logic/importer/fetcher/CiteSeer.java | 93 ++++++++++ .../importer/fileformat/BibtexParser.java | 5 - .../importer/fileformat/CoinsParser.java | 78 +++++++++ .../fetcher/ACMPortalFetcherTest.java | 2 + .../logic/importer/fetcher/CiteSeerTest.java | 36 ++++ .../logic/importer/fetcher/GvkParserTest.java | 3 +- .../importer/fetcher/INSPIREFetcherTest.java | 2 + .../importer/fetcher/SpringerFetcherTest.java | 2 + 12 files changed, 222 insertions(+), 169 deletions(-) delete mode 100644 src/main/java/org/jabref/gui/importer/fetcher/CiteSeerXFetcher.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java create mode 100644 src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java diff --git a/src/main/java/org/jabref/gui/importer/fetcher/CiteSeerXFetcher.java b/src/main/java/org/jabref/gui/importer/fetcher/CiteSeerXFetcher.java deleted file mode 100644 index 78ca7062d55..00000000000 --- a/src/main/java/org/jabref/gui/importer/fetcher/CiteSeerXFetcher.java +++ /dev/null @@ -1,162 +0,0 @@ -package org.jabref.gui.importer.fetcher; - -import java.io.IOException; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.swing.JPanel; - -import org.jabref.Globals; -import org.jabref.gui.importer.ImportInspectionDialog; -import org.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter; -import org.jabref.logic.help.HelpFile; -import org.jabref.logic.importer.ImportInspector; -import org.jabref.logic.importer.OutputPrinter; -import org.jabref.logic.net.URLDownload; -import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.FieldName; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class CiteSeerXFetcher implements EntryFetcher { - - private static final int MAX_PAGES_TO_LOAD = 8; - private static final String QUERY_MARKER = "___QUERY___"; - private static final String URL_START = "http://citeseer.ist.psu.edu"; - private static final String SEARCH_URL = CiteSeerXFetcher.URL_START + "/search?q=" + CiteSeerXFetcher.QUERY_MARKER - + "&submit=Search&sort=rlv&t=doc"; - private static final Pattern CITE_LINK_PATTERN = Pattern.compile(""); - - private static final String BASE_PATTERN = ""; - private static final Pattern TITLE_PATTERN = Pattern - .compile(CiteSeerXFetcher.BASE_PATTERN.replace(CiteSeerXFetcher.QUERY_MARKER, "citation_title")); - private static final Pattern AUTHOR_PATTERN = Pattern - .compile(CiteSeerXFetcher.BASE_PATTERN.replace(CiteSeerXFetcher.QUERY_MARKER, "citation_authors")); - private static final Pattern YEAR_PATTERN = Pattern - .compile(CiteSeerXFetcher.BASE_PATTERN.replace(CiteSeerXFetcher.QUERY_MARKER, "citation_year")); - private static final Pattern ABSTRACT_PATTERN = Pattern.compile("

Abstract

\\s*

(.*)

"); - - private static final Logger LOGGER = LoggerFactory.getLogger(CiteSeerXFetcher.class); - - private boolean stopFetching; - - @Override - public boolean processQuery(String query, ImportInspector inspector, OutputPrinter status) { - stopFetching = false; - try { - List citations = getCitations(query); - for (String citation : citations) { - if (stopFetching) { - break; - } - BibEntry entry = getSingleCitation(citation); - if (entry != null) { - inspector.addEntry(entry); - } - } - - return true; - } catch (IOException e) { - LOGGER.error("Error while fetching from " + getTitle(), e); - ((ImportInspectionDialog)inspector).showErrorMessage(this.getTitle(), e.getLocalizedMessage()); - return false; - } - } - - @Override - public String getTitle() { - return "CiteSeerX"; - } - - @Override - public HelpFile getHelpPage() { - return HelpFile.FETCHER_CITESEERX; - } - - @Override - public JPanel getOptionsPanel() { - return null; - } - - @Override - public void stopFetching() { - stopFetching = true; - } - - /** - * - * @param query - * The search term to query JStor for. - * @return a list of IDs - * @throws java.io.IOException - */ - private List getCitations(String query) throws IOException { - String urlQuery; - List ids = new ArrayList<>(); - urlQuery = CiteSeerXFetcher.SEARCH_URL.replace(CiteSeerXFetcher.QUERY_MARKER, - URLEncoder.encode(query, StandardCharsets.UTF_8.name())); - int count = 1; - String nextPage; - while (((nextPage = getCitationsFromUrl(urlQuery, ids)) != null) - && (count < CiteSeerXFetcher.MAX_PAGES_TO_LOAD)) { - urlQuery = nextPage; - count++; - if (stopFetching) { - break; - } - } - return ids; - } - - private static String getCitationsFromUrl(String urlQuery, List ids) throws IOException { - String cont = new URLDownload(urlQuery).asString(Globals.prefs.getDefaultEncoding()); - Matcher m = CiteSeerXFetcher.CITE_LINK_PATTERN.matcher(cont); - while (m.find()) { - ids.add(CiteSeerXFetcher.URL_START + m.group(1)); - } - - return null; - } - - private static BibEntry getSingleCitation(String urlString) throws IOException { - String cont = new URLDownload(urlString).asString(); - - // Find title, and create entry if we do. Otherwise assume we did not get an entry: - Matcher m = CiteSeerXFetcher.TITLE_PATTERN.matcher(cont); - if (m.find()) { - BibEntry entry = new BibEntry(); - entry.setField(FieldName.TITLE, m.group(1)); - - // Find authors: - m = CiteSeerXFetcher.AUTHOR_PATTERN.matcher(cont); - if (m.find()) { - String authors = m.group(1); - entry.setField(FieldName.AUTHOR, new NormalizeNamesFormatter().format(authors)); - } - - // Find year: - m = CiteSeerXFetcher.YEAR_PATTERN.matcher(cont); - if (m.find()) { - entry.setField(FieldName.YEAR, m.group(1)); - } - - // Find abstract: - m = CiteSeerXFetcher.ABSTRACT_PATTERN.matcher(cont); - if (m.find()) { - entry.setField(FieldName.ABSTRACT, m.group(1)); - } - - return entry; - } else { - return null; - } - - } - -} diff --git a/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java b/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java index 0b48c288b6b..75f5e082131 100644 --- a/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java +++ b/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java @@ -13,7 +13,6 @@ public class EntryFetchers { private final List entryFetchers = new LinkedList<>(); public EntryFetchers(JournalAbbreviationLoader abbreviationLoader) { - entryFetchers.add(new CiteSeerXFetcher()); entryFetchers.add(new IEEEXploreFetcher(abbreviationLoader)); WebFetchers.getSearchBasedFetchers(Globals.prefs.getImportFormatPreferences()).stream() diff --git a/src/main/java/org/jabref/logic/importer/Parser.java b/src/main/java/org/jabref/logic/importer/Parser.java index 3279906d534..4b9c77c3783 100644 --- a/src/main/java/org/jabref/logic/importer/Parser.java +++ b/src/main/java/org/jabref/logic/importer/Parser.java @@ -1,5 +1,6 @@ package org.jabref.logic.importer; +import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.List; @@ -11,4 +12,8 @@ public interface Parser { List parseEntries(InputStream inputStream) throws ParseException; + + default List parseEntries(String dataString) throws ParseException { + return parseEntries(new ByteArrayInputStream(dataString.getBytes())); + } } diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java index e882279111b..74fc2076b58 100644 --- a/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -9,6 +9,7 @@ import org.jabref.logic.importer.fetcher.ACS; import org.jabref.logic.importer.fetcher.ArXiv; import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem; +import org.jabref.logic.importer.fetcher.CiteSeer; import org.jabref.logic.importer.fetcher.CrossRef; import org.jabref.logic.importer.fetcher.DBLPFetcher; import org.jabref.logic.importer.fetcher.DOAJFetcher; @@ -89,6 +90,7 @@ public static List getSearchBasedFetchers(ImportFormatPrefer list.add(new DBLPFetcher(importFormatPreferences)); list.add(new SpringerFetcher()); list.add(new CrossRef()); + list.add(new CiteSeer()); list.add(new DOAJFetcher(importFormatPreferences)); list.sort(Comparator.comparing(WebFetcher::getName)); return list; diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java new file mode 100644 index 00000000000..65f1918c454 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java @@ -0,0 +1,93 @@ +package org.jabref.logic.importer.fetcher; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.jabref.logic.formatter.bibtexfields.HtmlToUnicodeFormatter; +import org.jabref.logic.formatter.casechanger.TitleCaseFormatter; +import org.jabref.logic.help.HelpFile; +import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fileformat.CoinsParser; +import org.jabref.logic.util.OS; +import org.jabref.model.cleanup.FieldFormatterCleanup; +import org.jabref.model.cleanup.Formatter; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.FieldName; + +import org.apache.http.client.utils.URIBuilder; + +public class CiteSeer implements SearchBasedParserFetcher { + + public CiteSeer() { + } + + @Override + public String getName() { + return "CiteSeerX"; + } + + @Override + public HelpFile getHelpPage() { + return HelpFile.FETCHER_CITESEERX; + } + + @Override + public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + URIBuilder uriBuilder = new URIBuilder("https://citeseer.ist.psu.edu/search"); + uriBuilder.addParameter("sort", "rlv"); // Sort by relevance + uriBuilder.addParameter("q", query); // Query + uriBuilder.addParameter("t", "doc"); // Type: documents + //uriBuilder.addParameter("start", "0"); // Start index (not supported at the moment) + return uriBuilder.build().toURL(); + } + + @Override + public Parser getParser() { + // MathSciNet returns COinS result embedded in HTML + // So we extract the data string from the tags and pass the content to the COinS parser + return inputStream -> { + String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE)); + + List entries = new ArrayList<>(); + CoinsParser parser = new CoinsParser(); + Pattern pattern = Pattern.compile(""); + Matcher matcher = pattern.matcher(response); + while (matcher.find()) { + String encodedDataString = matcher.group(1); + entries.addAll(parser.parseEntries(encodedDataString)); + } + return entries; + }; + } + + @Override + public void doPostCleanup(BibEntry entry) { + // CiteSeer escapes some characters in a way that is not recognized by the normal html to unicode formatter + // We, of course, also want to convert these special characters + Formatter extendedHtmlFormatter = new HtmlToUnicodeFormatter() { + @Override + public String format(String fieldText) { + String formatted = super.format(fieldText); + formatted = formatted.replaceAll("%3Cem%3", ""); + formatted = formatted.replaceAll("%3C%2Fem%3E", ""); + formatted = formatted.replaceAll("%2C\\+", " "); + formatted = formatted.replaceAll("\\+", " "); + return formatted; + } + }; + new FieldFormatterCleanup(FieldName.INTERNAL_ALL_FIELD, extendedHtmlFormatter).cleanup(entry); + + // Many titles in the CiteSeer database have all-capital titles, for convenience we convert them to title case + new FieldFormatterCleanup(FieldName.TITLE, new TitleCaseFormatter()).cleanup(entry); + } +} diff --git a/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java b/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java index d9e3f5d4e83..16093dca0b3 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java @@ -6,7 +6,6 @@ import java.io.InputStreamReader; import java.io.PushbackReader; import java.io.Reader; -import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Deque; @@ -110,10 +109,6 @@ public List parseEntries(Reader reader) throws ParseException { } } - public List parseEntries(String bibtexString) throws ParseException { - return parseEntries(new StringReader(bibtexString)); - } - public Optional parseSingleEntry(String bibtexString) throws ParseException { return parseEntries(bibtexString).stream().findFirst(); } diff --git a/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java b/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java new file mode 100644 index 00000000000..430c08dbf30 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java @@ -0,0 +1,78 @@ +package org.jabref.logic.importer.fileformat; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.jabref.logic.importer.ParseException; +import org.jabref.logic.importer.Parser; +import org.jabref.logic.util.OS; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BiblatexEntryTypes; +import org.jabref.model.entry.FieldName; + +/** + * @implNote implemented by reverse-engineering
the implementation by CiteSeerX + */ +public class CoinsParser implements Parser { + + private final Pattern DOI = Pattern.compile("%3Fdoi%3D([^&]+)"); + private final Pattern TITLE = Pattern.compile("&rft.atitle=([^&]+)"); + private final Pattern JOURNAL = Pattern.compile("&rft.jtitle=([^&]+)"); + private final Pattern YEAR = Pattern.compile("&rft.jtitle=([^&]+)"); + private final Pattern VOLUME = Pattern.compile("&rft.jtitle=([^&]+)"); + private final Pattern PAGES = Pattern.compile("&rft.pages=([^&]+)"); + private final Pattern ISSUE = Pattern.compile("&rft.issue=([^&]+)"); + private final Pattern TYPE = Pattern.compile("&rft.genre=([^&]+)"); + private final Pattern AUTHOR = Pattern.compile("&rft.au=([^&]+)"); + + @Override + public List parseEntries(InputStream inputStream) throws ParseException { + String data = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE)); + BibEntry entry = new BibEntry(); + + appendData(data, entry, DOI, FieldName.DOI); + appendData(data, entry, TITLE, FieldName.TITLE); + appendData(data, entry, JOURNAL, FieldName.JOURNALTITLE); + appendData(data, entry, YEAR, FieldName.YEAR); + appendData(data, entry, VOLUME, FieldName.VOLUME); + appendData(data, entry, PAGES, FieldName.PAGES); + appendData(data, entry, ISSUE, FieldName.ISSUE); + + Matcher matcherType = TYPE.matcher(data); + if (matcherType.find()) { + switch (matcherType.group(1)) { + case "article": + entry.setType(BiblatexEntryTypes.ARTICLE); + break; + case "unknown": + default: + entry.setType(BiblatexEntryTypes.MISC); + break; + } + } + + List authors = new ArrayList<>(); + Matcher matcherAuthors = AUTHOR.matcher(data); + while (matcherAuthors.find()) { + String author = matcherAuthors.group(1); + authors.add(author); + } + entry.setField(FieldName.AUTHOR, authors.stream().collect(Collectors.joining(" and "))); + + return Collections.singletonList(entry); + } + + private void appendData(String data, BibEntry entry, Pattern pattern, String fieldName) { + Matcher matcher = pattern.matcher(data); + if (matcher.find()) { + entry.setField(fieldName, matcher.group(1)); + } + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ACMPortalFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ACMPortalFetcherTest.java index 4bd9848ab4d..df85c30857f 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/ACMPortalFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/ACMPortalFetcherTest.java @@ -7,6 +7,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibtexEntryTypes; +import org.jabref.testutils.category.FetcherTest; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -15,6 +16,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +@FetcherTest class ACMPortalFetcherTest { ACMPortalFetcher fetcher; diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java new file mode 100644 index 00000000000..2def7bf491e --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java @@ -0,0 +1,36 @@ +package org.jabref.logic.importer.fetcher; + +import java.util.Collections; +import java.util.List; + +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BibtexEntryTypes; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@FetcherTest +class CiteSeerTest { + + CiteSeer fetcher; + + @BeforeEach + void setUp() throws Exception { + fetcher = new CiteSeer(); + } + + @Test + void searchByQueryFindsEntry() throws Exception { + BibEntry expected = new BibEntry(); + expected.setType(BibtexEntryTypes.MISC); + expected.setField("author", "Wang Wei and Zhang Pingwen and Zhang Zhifei"); + expected.setField("title", "Rigorous Derivation from Landau-de Gennes Theory to Eericksen-leslie Theory"); + expected.setField("doi", "10.1.1.744.5780"); + + List fetchedEntries = fetcher.performSearch("title:Ericksen-Leslie AND venue:q AND ncites:[10 TO 15000]"); + assertEquals(Collections.singletonList(expected), fetchedEntries); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java index 443b46be140..5e650d3378b 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java @@ -9,13 +9,14 @@ import org.jabref.logic.bibtex.BibEntryAssert; import org.jabref.logic.importer.fileformat.GvkParser; import org.jabref.model.entry.BibEntry; +import org.jabref.testutils.category.FetcherTest; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; - +@FetcherTest public class GvkParserTest { private void doTest(String xmlName, int expectedSize, List resourceNames) throws Exception { diff --git a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java index 5f436313d5a..d919f96ce1d 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java @@ -7,6 +7,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibtexEntryTypes; +import org.jabref.testutils.category.FetcherTest; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -15,6 +16,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +@FetcherTest class INSPIREFetcherTest { private INSPIREFetcher fetcher; diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java index fe985e23761..56020ce6de3 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java @@ -5,12 +5,14 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibtexEntryTypes; +import org.jabref.testutils.category.FetcherTest; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; +@FetcherTest class SpringerFetcherTest { SpringerFetcher fetcher; From e57296c31f33d18a0eb30c37c7051843c7c76b72 Mon Sep 17 00:00:00 2001 From: Tobias Diez Date: Thu, 12 Jul 2018 23:10:09 +0200 Subject: [PATCH 2/2] Add additional test --- .../jabref/gui/importer/fetcher/EntryFetchers.java | 6 ------ .../jabref/logic/importer/fetcher/CiteSeer.java | 1 + .../logic/importer/fileformat/CoinsParser.java | 4 ++-- .../logic/importer/fetcher/CiteSeerTest.java | 14 ++++++++++++++ 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java b/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java index 3b60a567ec6..353eddde4ae 100644 --- a/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java +++ b/src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java @@ -13,12 +13,6 @@ public class EntryFetchers { private final List entryFetchers = new LinkedList<>(); public EntryFetchers(JournalAbbreviationLoader abbreviationLoader) { - - - - - - WebFetchers.getSearchBasedFetchers(Globals.prefs.getImportFormatPreferences()).stream() .map(SearchBasedEntryFetcher::new) .forEach(entryFetchers::add); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java index 65f1918c454..8d244b101f8 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java @@ -78,6 +78,7 @@ public void doPostCleanup(BibEntry entry) { @Override public String format(String fieldText) { String formatted = super.format(fieldText); + formatted = formatted.replaceAll("%3A", ":"); formatted = formatted.replaceAll("%3Cem%3", ""); formatted = formatted.replaceAll("%3C%2Fem%3E", ""); formatted = formatted.replaceAll("%2C\\+", " "); diff --git a/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java b/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java index 430c08dbf30..156dc551b5e 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/CoinsParser.java @@ -25,8 +25,8 @@ public class CoinsParser implements Parser { private final Pattern DOI = Pattern.compile("%3Fdoi%3D([^&]+)"); private final Pattern TITLE = Pattern.compile("&rft.atitle=([^&]+)"); private final Pattern JOURNAL = Pattern.compile("&rft.jtitle=([^&]+)"); - private final Pattern YEAR = Pattern.compile("&rft.jtitle=([^&]+)"); - private final Pattern VOLUME = Pattern.compile("&rft.jtitle=([^&]+)"); + private final Pattern YEAR = Pattern.compile("&rft.date=([^&]+)"); + private final Pattern VOLUME = Pattern.compile("&rft.volume=([^&]+)"); private final Pattern PAGES = Pattern.compile("&rft.pages=([^&]+)"); private final Pattern ISSUE = Pattern.compile("&rft.issue=([^&]+)"); private final Pattern TYPE = Pattern.compile("&rft.genre=([^&]+)"); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java index 2def7bf491e..5460e4d3abb 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java @@ -33,4 +33,18 @@ void searchByQueryFindsEntry() throws Exception { List fetchedEntries = fetcher.performSearch("title:Ericksen-Leslie AND venue:q AND ncites:[10 TO 15000]"); assertEquals(Collections.singletonList(expected), fetchedEntries); } + + @Test + void searchByQueryFindsEntry2() throws Exception { + BibEntry expected = new BibEntry(); + expected.setType(BibtexEntryTypes.MISC); + expected.setField("author", "Lazarus Richard S."); + expected.setField("title", "Coping Theory and Research: Past Present and Future"); + expected.setField("doi", "10.1.1.115.9665"); + expected.setField("year", "1993"); + expected.setField("journaltitle", "PSYCHOSOMATIC MEDICINE"); + + List fetchedEntries = fetcher.performSearch("JabRef"); + assertEquals(expected, fetchedEntries.get(4)); + } }