JabRef · koppor · Aug 26, 2020 · Aug 24, 2020 · Aug 24, 2020 · Aug 24, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -106,6 +106,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 - We fixed an issue where percent sign ('%') was not formatted properly by the HTML formatter [#6753](https://github.com/JabRef/jabref/issues/6753)
 - We fixed an issue with the [SAO/NASA Astrophysics Data System](https://docs.jabref.org/collect/import-using-online-bibliographic-database/ads) fetcher where `\textbackslash` appeared at the end of the abstract.
 - We fixed an issue with the Science Direct fetcher where PDFs could not be downloaded. Fixes [#5860](https://github.com/JabRef/jabref/issues/5860)
+- We fixed an issue with the Library of Congress importer.
 
 ### Removed
 

diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java
@@ -13,6 +13,8 @@
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.strings.StringUtil;
 
+import org.slf4j.LoggerFactory;
+
 /**
  * Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure:
  * 1. Open a URL based on the search query
@@ -39,16 +41,22 @@ default List<BibEntry> performSearch(String query) throws FetcherException {
             return Collections.emptyList();
         }
 
-        try (InputStream stream = getUrlDownload(getURLForQuery(query)).asInputStream()) {
+        URL urlForQuery;
+        try {
+            urlForQuery = getURLForQuery(query);
+        } catch (URISyntaxException | MalformedURLException e) {
+            LoggerFactory.getLogger(this.getClass()).info("Search URL {} is malformed", query);
+            throw new FetcherException("Search URI is malformed", e);
+        }
+        try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) {
             List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
 
             // Post-cleanup
             fetchedEntries.forEach(this::doPostCleanup);
 
             return fetchedEntries;
-        } catch (URISyntaxException e) {
-            throw new FetcherException("Search URI is malformed", e);
         } catch (IOException e) {
+            LoggerFactory.getLogger(this.getClass()).info("IOException at URL {}", urlForQuery.toString());
             // TODO: Catch HTTP Response 401/403 errors and report that user has no rights to access resource
             throw new FetcherException("A network error occurred", e);
         } catch (ParseException e) {
@@ -65,12 +73,17 @@ default List<BibEntry> performSearch(String query) throws FetcherException {
      */
     @Override
     default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
-        try (InputStream stream = getUrlDownload(getComplexQueryURL(complexSearchQuery)).asInputStream()) {
+        URL complexQueryURL = null;
+        try {
+            complexQueryURL = getComplexQueryURL(complexSearchQuery);
+        } catch (URISyntaxException | MalformedURLException e) {
+            throw new FetcherException("Search URI is malformed", e);
+        }
+        LoggerFactory.getLogger(this.getClass()).debug("Using query URL {}", complexQueryURL.toString());
+        try (InputStream stream = getUrlDownload(complexQueryURL).asInputStream()) {
             List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
             fetchedEntries.forEach(this::doPostCleanup);
             return fetchedEntries;
-        } catch (URISyntaxException e) {
-            throw new FetcherException("Search URI is malformed", e);
         } catch (IOException e) {
             // TODO: Catch HTTP Response 401/403 errors and report that user has no rights to access resource
             throw new FetcherException("A network error occurred", e);

diff --git a/...a/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java b/...a/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java
@@ -3,6 +3,7 @@
 import java.net.MalformedURLException;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.Arrays;
 
 import org.jabref.logic.formatter.bibtexfields.RemoveDigitsFormatter;
 import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter;
@@ -14,7 +15,9 @@
 import org.jabref.logic.importer.SearchBasedParserFetcher;
 import org.jabref.model.cleanup.FieldFormatterCleanup;
 import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.FieldFactory;
 import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.field.UnknownField;
 
 import org.apache.http.client.utils.URIBuilder;
 
@@ -31,10 +34,10 @@ public CollectionOfComputerScienceBibliographiesFetcher(ImportFormatPreferences
     @Override
     public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
         return new URIBuilder(BASIC_SEARCH_URL)
-            .addParameter("query", query)
-            .addParameter("sort", "score")
-            .build()
-            .toURL();
+                .addParameter("query", query)
+                .addParameter("sort", "score")
+                .build()
+                .toURL();
     }
 
     @Override
@@ -53,5 +56,29 @@ public void doPostCleanup(BibEntry entry) {
         new FieldFormatterCleanup(StandardField.ABSTRACT, new ReplaceTabsBySpaceFormater()).cleanup(entry);
         new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveRedundantSpacesFormatter()).cleanup(entry);
         new FieldFormatterCleanup(StandardField.EDITOR, new RemoveDigitsFormatter()).cleanup(entry);
+        // identifier fields is a key-value field
+        // example: "urn:isbn:978-1-4503-5217-8; doi:10.1145/3129790.3129810; ISI:000505046100032; Scopus 2-s2.0-85037741580"
+        // thus, key can contain multiple ":"; sometimes value seaprated by " " instead of ":"
+        UnknownField identifierField = new UnknownField("identifier");
+        entry.getField(identifierField)
+             .stream()
+             .flatMap(value -> Arrays.stream(value.split("; ")))
+             .forEach(identifierKeyValue -> {
+                 // check for pattern "Scopus 2-..."
+                 String[] identifierKeyValueSplit = identifierKeyValue.split(" ");
+                 if (identifierKeyValueSplit.length == 1) {
+                     // check for pattern "doi:..."
+                     identifierKeyValueSplit = identifierKeyValue.split(":");
+                 }
+                 int length = identifierKeyValueSplit.length;
+                 if (length < 2) {
+                     return;
+                 }
+                 // in the case "urn:isbn:", just "isbn" is used
+                 String key = identifierKeyValueSplit[length - 2];
+                 String value = identifierKeyValueSplit[length - 1];
+                 entry.setField(FieldFactory.parseField(key), value);
+             });
+        entry.clearField(identifierField);
     }
 }
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
@@ -43,8 +43,7 @@ public class GoogleScholar implements FulltextFetcher, SearchBasedFetcher {
 
     private static final Pattern LINK_TO_BIB_PATTERN = Pattern.compile("(https:\\/\\/scholar.googleusercontent.com\\/scholar.bib[^\"]*)");
 
-    private static final String BASIC_SEARCH_URL = "https://scholar.google.com/scholar?";
-    private static final String SEARCH_IN_TITLE_URL = "https://scholar.google.com// scholar?";
+    private static final String BASIC_SEARCH_URL = "https://scholar.google.ch/scholar?";
 
     private static final int NUM_RESULTS = 10;
 
@@ -66,10 +65,10 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherExc
 
         try {
             // title search
-            URIBuilder uriBuilder = new URIBuilder(SEARCH_IN_TITLE_URL);
+            URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL);
             uriBuilder.addParameter("as_q", "");
             // as_epq as exact phrase
-            uriBuilder.addParameter("as_epq", entry.getField(StandardField.TITLE).orElse(null));
+            uriBuilder.addParameter("as_epq", entry.getField(StandardField.TITLE).orElse(""));
             // as_occt field to search in
             uriBuilder.addParameter("as_occt", "title");
 
@@ -131,33 +130,36 @@ public Optional<HelpFile> getHelpPage() {
     public List<BibEntry> performSearch(String query) throws FetcherException {
         try {
             obtainAndModifyCookie();
-            List<BibEntry> foundEntries = new ArrayList<>(10);
+            List<BibEntry> foundEntries = new ArrayList<>(20);
 
             URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL);
             uriBuilder.addParameter("hl", "en");
             uriBuilder.addParameter("btnG", "Search");
             uriBuilder.addParameter("q", query);
 
-            addHitsFromQuery(foundEntries, uriBuilder.toString());
-
-            if (foundEntries.size() == 10) {
-                uriBuilder.addParameter("start", "10");
+            try {
                 addHitsFromQuery(foundEntries, uriBuilder.toString());
+
+                if (foundEntries.size() == 10) {
+                    uriBuilder.addParameter("start", "10");
+                    addHitsFromQuery(foundEntries, uriBuilder.toString());
+                }
+            } catch (IOException e) {
+                LOGGER.info("IOException for URL {}", uriBuilder.toString());
+                // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge
+                // The caught IOException looks for example like this:
+                // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
+                if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
+                    throw new FetcherException("Fetching from Google Scholar failed.",
+                            Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
+                } else {
+                    throw new FetcherException("Error while fetching from " + getName(), e);
+                }
             }
 
             return foundEntries;
         } catch (URISyntaxException e) {
             throw new FetcherException("Error while fetching from " + getName(), e);
-        } catch (IOException e) {
-            // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge
-            // The caught IOException looks for example like this:
-            // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
-            if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
-                throw new FetcherException("Fetching from Google Scholar failed.",
-                        Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
-            } else {
-                throw new FetcherException("Error while fetching from " + getName(), e);
-            }
         }
     }
 
@@ -178,26 +180,28 @@ public List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery
                 uriBuilder.addParameter("as_yhi", year.toString());
             });
 
-            addHitsFromQuery(foundEntries, uriBuilder.toString());
-
-            if (foundEntries.size() == 10) {
-                uriBuilder.addParameter("start", "10");
+            try {
                 addHitsFromQuery(foundEntries, uriBuilder.toString());
-            }
 
+                if (foundEntries.size() == 10) {
+                    uriBuilder.addParameter("start", "10");
+                    addHitsFromQuery(foundEntries, uriBuilder.toString());
+                }
+            } catch (IOException e) {
+                LOGGER.info("IOException for URL {}", uriBuilder.toString());
+                // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge
+                // The caught IOException looks for example like this:
+                // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
+                if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
+                    throw new FetcherException("Fetching from Google Scholar failed.",
+                            Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
+                } else {
+                    throw new FetcherException("Error while fetching from " + getName(), e);
+                }
+            }
             return foundEntries;
         } catch (URISyntaxException e) {
             throw new FetcherException("Error while fetching from " + getName(), e);
-        } catch (IOException e) {
-            // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge
-            // The caught IOException looks for example like this:
-            // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
-            if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
-                throw new FetcherException("Fetching from Google Scholar failed.",
-                        Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
-            } else {
-                throw new FetcherException("Error while fetching from " + getName(), e);
-            }
         }
     }
 
@@ -215,6 +219,7 @@ private void addHitsFromQuery(List<BibEntry> entryList, String queryURL) throws
         String content = new URLDownload(queryURL).asString();
 
         if (needsCaptcha(content)) {
+            LOGGER.info("Captcha hit at {}", queryURL);
             throw new FetcherException("Fetching from Google Scholar failed.",
                     Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), null);
         }

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java
@@ -174,7 +174,7 @@ private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery
         complexSearchQuery.getAuthors().ifPresent(authors -> authors.forEach(author -> searchTerms.add("name:" + author)));
         complexSearchQuery.getTitlePhrases().ifPresent(titlePhrases -> titlePhrases.forEach(title -> searchTerms.add("title:" + title)));
         complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("journal:" + journal));
-        // Since Springer API does not support year range search we ignore formYear and toYear.
+        // Since Springer API does not support year range search, we ignore formYear and toYear and use "singleYear" only
         complexSearchQuery.getSingleYear().ifPresent(year -> searchTerms.add("year:" + year.toString()));
         complexSearchQuery.getDefaultField().ifPresent(defaultField -> searchTerms.add(defaultField));
         return String.join(" AND ", searchTerms);

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/SpringerLink.java b/src/main/java/org/jabref/logic/importer/fetcher/SpringerLink.java
@@ -6,6 +6,7 @@
 import java.util.Optional;
 
 import org.jabref.logic.importer.FulltextFetcher;
+import org.jabref.logic.util.BuildInfo;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.field.StandardField;
 import org.jabref.model.entry.identifier.DOI;
@@ -27,7 +28,7 @@ public class SpringerLink implements FulltextFetcher {
     private static final Logger LOGGER = LoggerFactory.getLogger(SpringerLink.class);
 
     private static final String API_URL = "https://api.springer.com/meta/v1/json";
-    private static final String API_KEY = "a98b4a55181ffcd27259bea45edad12e";
+    private static final String API_KEY = new BuildInfo().springerNatureAPIKey;
     private static final String CONTENT_HOST = "link.springer.com";
 
     @Override

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/ModsImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/ModsImporter.java
@@ -398,7 +398,7 @@ private void putDate(Map<Field, String> fields, String elementName, DateDefiniti
 
                 case "dateIssued":
                     // The first 4 digits of dateIssued should be the year
-                    fields.put(StandardField.YEAR, date.getValue().substring(0, 4));
+                    fields.put(StandardField.YEAR, date.getValue().replaceAll("[^0-9]*", "").replaceAll("\\(\\d?\\d?\\d?\\d?.*\\)", "\1"));
                     break;
                 case "dateCreated":
                     // If there was no year in date issued, then take the year from date created
@@ -435,7 +435,9 @@ private void handleAuthorsInNamePart(NameDefinition name, List<String> authors,
                 NamePartDefinition namePart = (NamePartDefinition) value;
                 String type = namePart.getAtType();
                 if ((type == null) && (namePart.getValue() != null)) {
-                    authors.add(namePart.getValue());
+                    String namePartValue = namePart.getValue();
+                    namePartValue = namePartValue.replaceAll(",$", "");
+                    authors.add(namePartValue);
                 } else if ("family".equals(type) && (namePart.getValue() != null)) {
                     // family should come first, so if family appears we can set the author then comes before
                     // we have to check if forename and family name are not empty in case it's the first author

diff --git a/src/main/java/org/jabref/logic/net/URLDownload.java b/src/main/java/org/jabref/logic/net/URLDownload.java
@@ -60,7 +60,7 @@
  */
 public class URLDownload {
 
-    public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0";
+    public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0";
 
     private static final Logger LOGGER = LoggerFactory.getLogger(URLDownload.class);
     private final URL source;

diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java
@@ -8,23 +8,19 @@
 import org.jabref.model.entry.types.StandardEntryType;
 import org.jabref.testutils.category.FetcherTest;
 
-import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 @FetcherTest
 class CiteSeerTest {
 
-    CiteSeer fetcher;
-
-    @BeforeEach
-    void setUp() throws Exception {
-        fetcher = new CiteSeer();
-    }
+    private CiteSeer fetcher = new CiteSeer();
 
     @Test
-    void searchByQueryFindsEntry() throws Exception {
+    @Disabled("CiteseerX currently has issues with ncites query")
+    void searchByQueryFindsEntryRigorousDerivation() throws Exception {
         BibEntry expected = new BibEntry(StandardEntryType.Misc)
                 .withField(StandardField.AUTHOR, "Wang Wei and Zhang Pingwen and Zhang Zhifei")
                 .withField(StandardField.TITLE, "Rigorous Derivation from Landau-de Gennes Theory to Eericksen-leslie Theory")
@@ -35,13 +31,13 @@ void searchByQueryFindsEntry() throws Exception {
     }
 
     @Test
-    void searchByQueryFindsEntry2() throws Exception {
+    void searchByQueryFindsEntryCopingTheoryAndResearch() throws Exception {
         BibEntry expected = new BibEntry(StandardEntryType.Misc)
                 .withField(StandardField.AUTHOR, "Lazarus Richard S.")
                 .withField(StandardField.TITLE, "Coping Theory and Research: Past Present and Future")
                 .withField(StandardField.DOI, "10.1.1.115.9665")
                 .withField(StandardField.YEAR, "1993")
-                .withField(StandardField.JOURNAL, "PSYCHOSOMATIC MEDICINE");
+                .withField(StandardField.JOURNALTITLE, "PSYCHOSOMATIC MEDICINE");
 
         List<BibEntry> fetchedEntries = fetcher.performSearch("doi:10.1.1.115.9665");
         assertEquals(Collections.singletonList(expected), fetchedEntries);