Feature/implement complex queries (#7350)

JabRef · Jan 26, 2021 · 034cf8c · 034cf8c
1 parent 7117b61
commit 034cf8c
Show file tree

Hide file tree

Showing 53 changed files with 1,617 additions and 371 deletions.
diff --git a/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java
@@ -3,34 +3,40 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Optional;
 
-import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.paging.Page;
 
+import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
+import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
+import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
+import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
+
 public interface PagedSearchBasedFetcher extends SearchBasedFetcher {
 
     /**
-     * @param complexSearchQuery the complex query defining all fielded search parameters
-     * @param pageNumber         requested site number indexed from 0
+     * @param luceneQuery the root node of the lucene query
+     * @param pageNumber       requested site number indexed from 0
      * @return Page with search results
      */
-    Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException;
+    Page<BibEntry> performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException;
 
     /**
-     * @param complexSearchQuery query string that can be parsed into a complex search query
-     * @param pageNumber         requested site number indexed from 0
+     * @param searchQuery query string that can be parsed into a lucene query
+     * @param pageNumber  requested site number indexed from 0
      * @return Page with search results
      */
-    default Page<BibEntry> performSearchPaged(String complexSearchQuery, int pageNumber) throws FetcherException {
-        if (complexSearchQuery.isBlank()) {
-            return new Page<>(complexSearchQuery, pageNumber, Collections.emptyList());
+    default Page<BibEntry> performSearchPaged(String searchQuery, int pageNumber) throws FetcherException {
+        if (searchQuery.isBlank()) {
+            return new Page<>(searchQuery, pageNumber, Collections.emptyList());
+        }
+        SyntaxParser parser = new StandardSyntaxParser();
+        final String NO_EXPLICIT_FIELD = "default";
+        try {
+            return this.performSearchPaged(parser.parse(searchQuery, NO_EXPLICIT_FIELD), pageNumber);
+        } catch (QueryNodeParseException e) {
+            throw new FetcherException("An error occurred during parsing of the query.");
         }
-        QueryParser queryParser = new QueryParser();
-        Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
-        // Otherwise just use query as a default term
-        return this.performSearchPaged(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()), pageNumber);
     }
 
     /**
@@ -40,13 +46,14 @@ default int getPageSize() {
         return 20;
     }
 
-    @Override
-    default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
-        return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent());
+    /**
+     * This method is used to send complex queries using fielded search.
+     *
+     * @param luceneQuery the root node of the lucene query
+     * @return a list of {@link BibEntry}, which are matched by the query (may be empty)
+     */
+    default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
+        return new ArrayList<>(performSearchPaged(luceneQuery, 0).getContent());
     }
 
-    @Override
-    default List<BibEntry> performSearch(String complexSearchQuery) throws FetcherException {
-        return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent());
-    }
 }
diff --git a/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java
@@ -7,22 +7,23 @@
 import java.net.URL;
 import java.util.List;
 
-import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.paging.Page;
 
+import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
+
 public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {
 
     @Override
-    default Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
+    default Page<BibEntry> performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException {
         // ADR-0014
         URL urlForQuery;
         try {
-            urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber);
+            urlForQuery = getURLForQuery(luceneQuery, pageNumber);
         } catch (URISyntaxException | MalformedURLException e) {
             throw new FetcherException("Search URI crafted from complex search query is malformed", e);
         }
-        return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery));
+        return new Page<>(luceneQuery.toString(), pageNumber, getBibEntries(urlForQuery));
     }
 
     private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
@@ -39,34 +40,18 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
 
     /**
      * Constructs a URL based on the query, size and page number.
-     *
-     * @param query      the search query
+     *  @param luceneQuery      the search query
      * @param pageNumber the number of the page indexed from 0
      */
-    URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException;
-
-    /**
-     * Constructs a URL based on the query, size and page number.
-     *
-     * @param complexSearchQuery the search query
-     * @param pageNumber         the number of the page indexed from 0
-     */
-    default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
-        return getURLForQuery(complexSearchQuery.toString(), pageNumber);
-    }
-
-    @Override
-    default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
-        return SearchBasedParserFetcher.super.performSearch(complexSearchQuery);
-    }
+    URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException;
 
     @Override
-    default URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
-        return getURLForQuery(query, 0);
+    default URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException {
+        return getURLForQuery(luceneQuery, 0);
     }
 
     @Override
-    default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException {
-        return getComplexQueryURL(query, 0);
+    default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
+        return SearchBasedParserFetcher.super.performSearch(luceneQuery);
     }
 }
diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java
@@ -2,11 +2,16 @@
 
 import java.util.Collections;
 import java.util.List;
-import java.util.Optional;
 
-import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
 import org.jabref.model.entry.BibEntry;
 
+import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
+import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
+import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
+import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
+
+import static org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer.NO_EXPLICIT_FIELD;
+
 /**
  * Searches web resources for bibliographic information based on a free-text query.
  * May return multiple search hits.
@@ -16,24 +21,27 @@ public interface SearchBasedFetcher extends WebFetcher {
     /**
      * This method is used to send complex queries using fielded search.
      *
-     * @param complexSearchQuery the complex search query defining all fielded search parameters
+     * @param luceneQuery the root node of the lucene query
      * @return a list of {@link BibEntry}, which are matched by the query (may be empty)
      */
-    List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException;
+    List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException;
 
     /**
      * Looks for hits which are matched by the given free-text query.
      *
-     * @param complexSearchQuery query string that can be parsed into a complex search query
+     * @param searchQuery query string that can be parsed into a lucene query
      * @return a list of {@link BibEntry}, which are matched by the query (may be empty)
      */
-    default List<BibEntry> performSearch(String complexSearchQuery) throws FetcherException {
-        if (complexSearchQuery.isBlank()) {
+    default List<BibEntry> performSearch(String searchQuery) throws FetcherException {
+        if (searchQuery.isBlank()) {
             return Collections.emptyList();
         }
-        QueryParser queryParser = new QueryParser();
-        Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
-        // Otherwise just use query as a default term
-        return this.performSearch(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()));
+        SyntaxParser parser = new StandardSyntaxParser();
+
+        try {
+            return this.performSearch(parser.parse(searchQuery, NO_EXPLICIT_FIELD));
+        } catch (QueryNodeParseException e) {
+            throw new FetcherException("An error occured when parsing the query");
+        }
     }
 }
diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java
@@ -8,9 +8,10 @@
 import java.util.List;
 
 import org.jabref.logic.cleanup.Formatter;
-import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
 import org.jabref.model.entry.BibEntry;
 
+import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
+
 /**
  * Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure:
  * <ol>
@@ -26,14 +27,14 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher {
      * This method is necessary as the performSearch method does not support certain URL parameters that are used for
      * fielded search, such as a title, author, or year parameter.
      *
-     * @param complexSearchQuery the search query defining all fielded search parameters
+     * @param luceneQuery the root node of the lucene query
      */
     @Override
-    default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
+    default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
         // ADR-0014
         URL urlForQuery;
         try {
-            urlForQuery = getURLForQuery(complexSearchQuery);
+            urlForQuery = getURLForQuery(luceneQuery);
         } catch (URISyntaxException | MalformedURLException | FetcherException e) {
             throw new FetcherException("Search URI crafted from complex search query is malformed", e);
         }
@@ -52,22 +53,17 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
         }
     }
 
-    default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException {
-        // Default implementation behaves as getURLForQuery treating complex query as plain string query
-        return this.getURLForQuery(query.toString());
-    }
-
     /**
      * Returns the parser used to convert the response to a list of {@link BibEntry}.
      */
     Parser getParser();
 
     /**
-     * Constructs a URL based on the query.
+     * Constructs a URL based on the lucene query.
      *
-     * @param query the search query
+     * @param luceneQuery the root node of the lucene query
      */
-    URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException;
+    URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException;
 
     /**
      * Performs a cleanup of the fetched entry.

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ACMPortalFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/ACMPortalFetcher.java
@@ -11,10 +11,12 @@
 import org.jabref.logic.importer.ImportFormatPreferences;
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.SearchBasedParserFetcher;
+import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer;
 import org.jabref.logic.importer.fileformat.BibtexParser;
 import org.jabref.model.util.DummyFileUpdateMonitor;
 
 import org.apache.http.client.utils.URIBuilder;
+import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
 
 public class ACMPortalFetcher implements SearchBasedParserFetcher {
 
@@ -36,15 +38,16 @@ public Optional<HelpFile> getHelpPage() {
         return Optional.of(HelpFile.FETCHER_ACM);
     }
 
-    private static String createQueryString(String query) {
+    private static String createQueryString(QueryNode query) throws FetcherException {
+        String queryString = new DefaultQueryTransformer().transformLuceneQuery(query).orElse("");
         // Query syntax to search for an entry that matches "one" and "two" in any field is: (+one +two)
-        return "(%252B" + query.trim().replaceAll("\\s+", "%20%252B") + ")";
+        return "(%252B" + queryString.trim().replaceAll("\\s+", "%20%252B") + ")";
     }
 
     @Override
-    public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
+    public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException {
         URIBuilder uriBuilder = new URIBuilder(SEARCH_URL);
-        uriBuilder.addParameter("query", createQueryString(query)); // Search all fields
+        uriBuilder.addParameter("query", createQueryString(luceneQuery)); // Search all fields
         uriBuilder.addParameter("within", "owners.owner=GUIDE"); // Search within the ACM Guide to Computing Literature (encompasses the ACM Full-Text Collection)
         uriBuilder.addParameter("expformat", "bibtex"); // BibTeX format
         return uriBuilder.build().toURL();

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
@@ -23,6 +23,7 @@
 import org.jabref.logic.importer.IdFetcher;
 import org.jabref.logic.importer.ImportFormatPreferences;
 import org.jabref.logic.importer.PagedSearchBasedFetcher;
+import org.jabref.logic.importer.fetcher.transformators.ArXivQueryTransformer;
 import org.jabref.logic.util.io.XMLUtil;
 import org.jabref.logic.util.strings.StringSimilarity;
 import org.jabref.model.entry.BibEntry;
@@ -36,6 +37,7 @@
 import org.jabref.model.util.OptionalUtil;
 
 import org.apache.http.client.utils.URIBuilder;
+import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -252,25 +254,26 @@ public Optional<HelpFile> getHelpPage() {
     /**
      * Constructs a complex query string using the field prefixes specified at https://arxiv.org/help/api/user-manual
      *
-     * @param complexSearchQuery the search query defining all fielded search parameters
+     * @param luceneQuery the root node of the lucene query
      * @return A list of entries matching the complex query
      */
     @Override
-    public Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
-        List<String> searchTerms = new ArrayList<>();
-        complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("au:" + author));
-        complexSearchQuery.getTitlePhrases().forEach(title -> searchTerms.add("ti:" + title));
-        complexSearchQuery.getAbstractPhrases().forEach(abstr -> searchTerms.add("abs:" + abstr));
-        complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("jr:" + journal));
-        // Since ArXiv API does not support year search, we ignore the year related terms
-        complexSearchQuery.getToYear().ifPresent(year -> searchTerms.add(year.toString()));
-        searchTerms.addAll(complexSearchQuery.getDefaultFieldPhrases());
-        String complexQueryString = String.join(" AND ", searchTerms);
-
-        List<BibEntry> searchResult = searchForEntries(complexQueryString, pageNumber).stream()
-                                                                                      .map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
-                                                                                      .collect(Collectors.toList());
-        return new Page<>(complexQueryString, pageNumber, searchResult);
+    public Page<BibEntry> performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException {
+        ArXivQueryTransformer transformer = new ArXivQueryTransformer();
+        String transformedQuery = transformer.transformLuceneQuery(luceneQuery).orElse("");
+        List<BibEntry> searchResult = searchForEntries(transformedQuery, pageNumber).stream()
+                                                                                    .map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
+                                                                                    .collect(Collectors.toList());
+        return new Page<>(transformedQuery, pageNumber, filterYears(searchResult, transformer));
+    }
+
+    private List<BibEntry> filterYears(List<BibEntry> searchResult, ArXivQueryTransformer transformer) {
+        return searchResult.stream()
+                           .filter(entry -> entry.getField(StandardField.DATE).isPresent())
+                           // Filter the date field for year only
+                           .filter(entry -> transformer.getEndYear().isEmpty() || Integer.parseInt(entry.getField(StandardField.DATE).get().substring(0, 4)) <= transformer.getEndYear().get())
+                           .filter(entry -> transformer.getStartYear().isEmpty() || Integer.parseInt(entry.getField(StandardField.DATE).get().substring(0, 4)) >= transformer.getStartYear().get())
+                           .collect(Collectors.toList());
     }
 
     @Override