Skip to content

Commit

Permalink
Merge pull request #7073 from k3KAW8Pnf7mkmdSMPHz27/fix-for-issue-6777
Browse files Browse the repository at this point in the history
Fixes exception in preview using regexp search and regexp search without specified field
  • Loading branch information
Siedlerchr committed Nov 21, 2020
2 parents b5ead5e + 9abbed2 commit 6f35c36
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 66 deletions.
43 changes: 27 additions & 16 deletions src/main/java/org/jabref/gui/preview/PreviewViewer.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ public class PreviewViewer extends ScrollPane implements InvalidationListener {
"function(){function n(e){t(this,n),this.ctx=e,this.ie=!1;var r=window.navigator.userAgent;(r.indexOf(\"MSIE\")>-1||r.indexOf(\"Trident\")>-1)&&(this.ie=!0)}return r(n,[{key:\"log\",value:function(t){var n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:\"debug\",r=this.opt.log;this.opt.debug&&\"object\"===e(r)&&\"function\"==typeof r[n]&&r[n](\"mark.js: \".concat(t))}},{key:\"getSeparatedKeywords\",value:function(e){var t=this,n=[];return e.forEach(function(e){t.opt.separateWordSearch?e.split(\" \").forEach(function(e){e.trim()&&-1===n.indexOf(e)&&n.push(e)}):e.trim()&&-1===n.indexOf(e)&&n.push(e)}),{keywords:n.sort(function(e,t){return t.length-e.length}),length:n.length}}},{key:\"isNumeric\",value:function(e){return Number(parseFloat(e))==e}},{key:\"checkRanges\",value:function(e){var t=this;if(!Array.isArray(e)||\"[object Object]\"!==Object.prototype.toString.call(e[0]))return this.log(\"markRanges() will only accept an array of objects\"),this.opt.noMatch(e),[];var n=[],r=0;return e.sort(function(e,t){return e.start-t.start}).forEach(function(e){var o=t.callNoMatchOnInvalidRanges(e,r),i=o.start,a=o.end;o.valid&&(e.start=i,e.length=a-i,n.push(e),r=a)}),n}},{key:\"callNoMatchOnInvalidRanges\",value:function(e,t){var n,r,o=!1;return e&&void 0!==e.start?(r=(n=parseInt(e.start,10))+parseInt(e.length,10),this.isNumeric(e.start)&&this.isNumeric(e.length)&&r-t>0&&r-n>0?o=!0:(this.log(\"Ignoring invalid or overlapping range: \"+\"\".concat(JSON.stringify(e))),this.opt.noMatch(e))):(this.log(\"Ignoring invalid range: \".concat(JSON.stringify(e))),this.opt.noMatch(e)),{start:n,end:r,valid:o}}},{key:\"checkWhitespaceRanges\",value:function(e,t,n){var r,o=!0,i=n.length,a=t-i,s=parseInt(e.start,10)-a;return(r=(s=s>i?i:s)+parseInt(e.length,10))>i&&(r=i,this.log(\"End range automatically set to the max value of \".concat(i))),s<0||r-s<0||s>i||r>i?(o=!1,this.log(\"Invalid range: \".concat(JSON.stringify(e))),this.opt.noMatch(e)):\"\"===n.substring(s,r).replace(/\\s+/g,\"\")&&(o=!1,this.log(\"Skipping whitespace only range: \"+JSON.stringify(e)),this.opt.noMatch(e)),{start:s,end:r,valid:o}}},{key:\"getTextNodes\",value:function(e){var t=this,n=\"\",r=[];this.iterator.forEachNode(NodeFilter.SHOW_TEXT,function(e){r.push({start:n.length,end:(n+=e.textContent).length,node:e})},function(e){return t.matchesExclude(e.parentNode)?NodeFilter.FILTER_REJECT:NodeFilter.FILTER_ACCEPT},function(){e({value:n,nodes:r})})}},{key:\"matchesExclude\",value:function(e){return i.matches(e,this.opt.exclude.concat([\"script\",\"style\",\"title\",\"head\",\"html\"]))}},{key:\"wrapRangeInTextNode\",value:function(e,t,n){var r=this.opt.element?this.opt.element:\"mark\",o=e.splitText(t),i=o.splitText(n-t),a=document.createElement(r);return a.setAttribute(\"data-markjs\",\"true\"),this.opt.className&&a.setAttribute(\"class\",this.opt.className),a.textContent=o.textContent,o.parentNode.replaceChild(a,o),i}},{key:\"wrapRangeInMappedTextNode\",value:function(e,t,n,r,o){var i=this;e.nodes.every(function(a,s){var c=e.nodes[s+1];if(void 0===c||c.start>t){if(!r(a.node))return!1;var u=t-a.start,l=(n>a.end?a.end:n)-a.start,h=e.value.substr(0,a.start),f=e.value.substr(l+a.start);if(a.node=i.wrapRangeInTextNode(a.node,u,l),e.value=h+f,e.nodes.forEach(function(t,n){n>=s&&(e.nodes[n].start>0&&n!==s&&(e.nodes[n].start-=l),e.nodes[n].end-=l)}),n-=l,o(a.node.previousSibling,a.start),!(n>a.end))return!1;t=a.end}return!0})}},{key:\"wrapGroups\",value:function(e,t,n,r){return r((e=this.wrapRangeInTextNode(e,t,t+n)).previousSibling),e}},{key:\"separateGroups\",value:function(e,t,n,r,o){for(var i=t.length,a=1;a<i;a++){var s=e.textContent.indexOf(t[a]);t[a]&&s>-1&&r(t[a],e)&&(e=this.wrapGroups(e,s,t[a].length,o))}return e}},{key:\"wrapMatches\",value:function(e,t,n,r,o){var i=this,a=0===t?0:t+1;this.getTextNodes(function(t){t.nodes.forEach(function(t){var o;for(t=t.node;null!==(o=e.exec(t.textContent))&&\"\"!==o[a];){if(i.opt.separateGroups)t=i.separateGroups(t,o,a,n,r);else{if(!n(o[a],t))continue;var s=o.index;if(0!==a)for(var c=1;c<a;c++)s+=o[c].length;t=i.wrapGroups(t,s,o[a].length,r)}e.lastIndex=0}}),o()})}},{key:\"wrapMatchesAcrossElements\",value:function(e,t,n,r,o){var i=this,a=0===t?0:t+1;this.getTextNodes(function(t){for(var s;null!==(s=e.exec(t.value))&&\"\"!==s[a];){var c=s.index;if(0!==a)for(var u=1;u<a;u++)c+=s[u].length;var l=c+s[a].length;i.wrapRangeInMappedTextNode(t,c,l,function(e){return n(s[a],e)},function(t,n){e.lastIndex=n,r(t)})}o()})}},{key:\"wrapRangeFromIndex\",value:function(e,t,n,r){var o=this;this.getTextNodes(function(i){var a=i.value.length;e.forEach(function(e,r){var s=o.checkWhitespaceRanges(e,a,i.value),c=s.start,u=s.end;s.valid&&o.wrapRangeInMappedTextNode(i,c,u,function(n){return t(n,e,i.value.substring(c,u),r)},function(t){n(t,e)})}),r()})}},{key:\"unwrapMatches\",value:function(e){for(var t=e.parentNode,n=document.createDocumentFragment();e.firstChild;)n.appendChild(e.removeChild(e.firstChild));t.replaceChild(n,e),this.ie?this.normalizeTextNode(t):t.normalize()}},{key:\"normalizeTextNode\",value:function(e){if(e){if(3===e.nodeType)for(;e.nextSibling&&3===e.nextSibling.nodeType;)e.nodeValue+=e.nextSibling.nodeValue,e.parentNode.removeChild(e.nextSibling);else this.normalizeTextNode(e.firstChild);this.normalizeTextNode(e.nextSibling)}}},{key:\"markRegExp\",value:function(e,t){var n=this;this.opt=t,this.log('Searching with expression \"'.concat(e,'\"'));var r=0,o=\"wrapMatches\";this.opt.acrossElements&&(o=\"wrapMatchesAcrossElements\"),this[o](e,this.opt.ignoreGroups,function(e,t){return n.opt.filter(t,e,r)},function(e){r++,n.opt.each(e)},function(){0===r&&n.opt.noMatch(e),n.opt.done(r)})}},{key:\"mark\",value:function(e,t){var n=this;this.opt=t;var r=0,o=\"wrapMatches\",i=this.getSeparatedKeywords(\"string\"==typeof e?[e]:e),s=i.keywords,c=i.length;this.opt.acrossElements&&(o=\"wrapMatchesAcrossElements\"),0===c?this.opt.done(r):function e(t){var i=new a(n.opt).create(t),u=0;n.log('Searching with expression \"'.concat(i,'\"')),n[o](i,1,function(e,o){return n.opt.filter(o,t,r,u)},function(e){u++,r++,n.opt.each(e)},function(){0===u&&n.opt.noMatch(t),s[c-1]===t?n.opt.done(r):e(s[s.indexOf(t)+1])})}(s[0])}},{key:\"markRanges\",value:function(e,t){var n=this;this.opt=t;var r=0,o=this.checkRanges(e);o&&o.length?(this.log(\"Starting to mark with the following ranges: \"+JSON.stringify(o)),this.wrapRangeFromIndex(o,function(e,t,r,o){return n.opt.filter(e,t,r,o)},function(e,t){r++,n.opt.each(e,t)},function(){n.opt.done(r)})):this.opt.done(r)}},{key:\"unmark\",value:function(e){var t=this;this.opt=e;var n=this.opt.element?this.opt.element:\"*\";n+=\"[data-markjs]\",this.opt.className&&(n+=\".\".concat(this.opt.className)),this.log('Removal selector \"'.concat(n,'\"')),this.iterator.forEachNode(NodeFilter.SHOW_ELEMENT,function(e){t.unwrapMatches(e)},function(e){var r=i.matches(e,n),o=t.matchesExclude(e);return!r||o?NodeFilter.FILTER_REJECT:NodeFilter.FILTER_ACCEPT},this.opt.done)}},{key:\"opt\",set:function(e){this._opt=o({},{element:\"\",className:\"\",exclude:[],iframes:!1,iframesTimeout:5e3,separateWordSearch:!0,acrossElements:!1,ignoreGroups:0,each:function(){},noMatch:function(){},filter:function(){return!0},done:function(){},debug:!1,log:window.console},e)},get:function(){return this._opt}},{key:\"iterator\",get:function(){return new i(this.ctx,this.opt.iframes,this.opt.exclude,this.opt.iframesTimeout)}}]),n}();return function(e){var t=this,n=new s(e);return this.mark=function(e,r){return n.mark(e,r),t},this.markRegExp=function(e,r){return n.markRegExp(e,r),t},this.markRanges=function(e,r){return n.markRanges(e,r),t},this.unmark=function(e){return n.unmark(e),t},this}});\n" +
" </script>\n" +
"</head>";
private static final String JS_MARK_REG_EXP_CALLBACK = "" +
"{done: function(){" +
" markInstance.markRegExp(%s);}" +
"}";
private static final String JS_UNMARK_WITH_CALLBACK = "" +
"var markInstance = new Mark(document.getElementById(\"content\"));" +
"markInstance.unmark(%s);";
private static final Pattern UNESCAPED_FORWARD_SLASH = Pattern.compile("\"(?<!\\\\\\\\)/\"");

private final ClipBoardManager clipBoardManager;
private final DialogService dialogService;
Expand Down Expand Up @@ -137,23 +145,27 @@ public void setTheme(Theme theme) {
}

private void highlightSearchPattern() {
String callbackForUnmark = "";
if (searchHighlightPattern.isPresent()) {
String pattern = searchHighlightPattern.get().pattern();

previewView.getEngine().executeScript(
"var markInstance = new Mark(document.getElementById(\"content\"));" +
"markInstance.unmark({" +
" done: function(){" +
" markInstance.markRegExp(/" + pattern + "/gmi);" +
" }" +
" });"
);
} else {
previewView.getEngine().executeScript(
"var markInstance = new Mark(document.getElementById(\"content\"));" +
"markInstance.unmark()"
);
String javaScriptRegex = createJavaScriptRegex(searchHighlightPattern.get());
callbackForUnmark = String.format(JS_MARK_REG_EXP_CALLBACK, javaScriptRegex);
}
String unmarkInstance = String.format(JS_UNMARK_WITH_CALLBACK, callbackForUnmark);
previewView.getEngine().executeScript(unmarkInstance);
}

/**
* Returns the String representation of a JavaScript regex object. The method does not take into account differences between the regex implementations in Java and JavaScript.
*
* @param regex Java regex to print as a JavaScript regex
* @return JavaScript regex object
*/
private static String createJavaScriptRegex(Pattern regex) {
String pattern = regex.pattern();
// Create a JavaScript regular expression literal (https://ecma-international.org/ecma-262/10.0/index.html#sec-literals-regular-expression-literals)
// Forward slashes are reserved to delimit the regular expression body. Hence, they must be escaped.
pattern = UNESCAPED_FORWARD_SLASH.matcher(pattern).replaceAll("\\\\/");
return "/" + pattern + "/gmi";
}

public void setLayout(PreviewLayout newLayout) {
Expand Down Expand Up @@ -223,7 +235,6 @@ public void print() {
}

public void copyPreviewToClipBoard() {
StringBuilder previewStringContent = new StringBuilder();
Document document = previewView.getEngine().getDocument();

ClipboardContent content = new ClipboardContent();
Expand Down
65 changes: 36 additions & 29 deletions src/main/java/org/jabref/logic/search/SearchQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.StringJoiner;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
Expand All @@ -17,24 +18,41 @@
import org.jabref.model.search.rules.SentenceAnalyzer;

public class SearchQuery implements SearchMatcher {

/**
* Regex pattern for escaping special characters in javascript regular expressions
*/
public static final Pattern JAVASCRIPT_ESCAPED_CHARS_PATTERN = Pattern.compile("[\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]\\\\/]");

/**
* The mode of escaping special characters in regular expressions
*/
private enum EscapeMode {
/**
* using \Q and \E marks
*/
JAVA,
JAVA {
@Override
String format(String regex) {
return Pattern.quote(regex);
}
},
/**
* escaping all javascript regex special characters separately
*/
JAVASCRIPT
JAVASCRIPT {
@Override
String format(String regex) {
return JAVASCRIPT_ESCAPED_CHARS_PATTERN.matcher(regex).replaceAll("\\\\$0");
}
};

/**
* Regex pattern for escaping special characters in javascript regular expressions
*/
private static final Pattern JAVASCRIPT_ESCAPED_CHARS_PATTERN = Pattern.compile("[.*+?^${}()|\\[\\]\\\\/]");

/**
* Attempt to escape all regex special characters.
*
* @param regex a string containing a regex expression
* @return a regex with all special characters escaped
*/
abstract String format(String regex);
}

private final String query;
Expand Down Expand Up @@ -128,8 +146,7 @@ public boolean isRegularExpression() {
}

/**
* Returns a list of words this query searches for.
* The returned strings can be a regular expression.
* Returns a list of words this query searches for. The returned strings can be a regular expression.
*/
public List<String> getSearchWords() {
if (isRegularExpression()) {
Expand All @@ -151,7 +168,9 @@ public Optional<Pattern> getJavaScriptPatternForWords() {
return joinWordsToPattern(EscapeMode.JAVASCRIPT);
}

/** Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled
/**
* Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled
*
* @param escapeMode the mode of escaping special characters in wi
*/
private Optional<Pattern> joinWordsToPattern(EscapeMode escapeMode) {
Expand All @@ -162,24 +181,12 @@ private Optional<Pattern> joinWordsToPattern(EscapeMode escapeMode) {
}

// compile the words to a regular expression in the form (w1)|(w2)|(w3)
StringJoiner joiner = new StringJoiner(")|(", "(", ")");
for (String word : words) {
if (regularExpression) {
joiner.add(word);
} else {
switch (escapeMode) {
case JAVA:
joiner.add(Pattern.quote(word));
break;
case JAVASCRIPT:
joiner.add(JAVASCRIPT_ESCAPED_CHARS_PATTERN.matcher(word).replaceAll("\\\\$0"));
break;
default:
throw new IllegalArgumentException("Unknown special characters escape mode: " + escapeMode);
}
}
Stream<String> joiner = words.stream();
if (!regularExpression) {
// Reformat string when we are looking for a literal match
joiner = joiner.map(escapeMode::format);
}
String searchPattern = joiner.toString();
String searchPattern = joiner.collect(Collectors.joining(")|(", "(", ")"));

if (caseSensitive) {
return Optional.of(Pattern.compile(searchPattern));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ public Boolean visitComparison(SearchParser.ComparisonContext context) {
if (fieldDescriptor.isPresent()) {
return comparison(fieldDescriptor.get().getText(), ComparisonOperator.build(context.operator.getText()), right);
} else {
return new ContainBasedSearchRule(caseSensitive).applyRule(right, entry);
return SearchRules.getSearchRule(caseSensitive, regex).applyRule(right, entry);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ private static boolean isSimpleQuery(String query) {
return SIMPLE_EXPRESSION.matcher(query).matches();
}

private static SearchRule getSearchRule(boolean caseSensitive, boolean regex) {
static SearchRule getSearchRule(boolean caseSensitive, boolean regex) {
if (regex) {
return new RegexBasedSearchRule(caseSensitive);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* Test case for ContainBasedSearchRule.
Expand All @@ -23,32 +24,31 @@ public void testBasicSearchParsing() {

String query = "marine 2001 shields";

assertEquals(false, bsCaseSensitive.applyRule(query, be));
assertEquals(true, bsCaseInsensitive.applyRule(query, be));
assertEquals(false, bsCaseSensitiveRegexp.applyRule(query, be));
assertEquals(false, bsCaseInsensitiveRegexp.applyRule(query, be));
assertFalse(bsCaseSensitive.applyRule(query, be));
assertTrue(bsCaseInsensitive.applyRule(query, be));
assertFalse(bsCaseSensitiveRegexp.applyRule(query, be));
assertFalse(bsCaseInsensitiveRegexp.applyRule(query, be));

query = "\"marine larviculture\"";

assertEquals(false, bsCaseSensitive.applyRule(query, be));
assertEquals(false, bsCaseInsensitive.applyRule(query, be));
assertEquals(false, bsCaseSensitiveRegexp.applyRule(query, be));
assertEquals(false, bsCaseInsensitiveRegexp.applyRule(query, be));
assertFalse(bsCaseSensitive.applyRule(query, be));
assertFalse(bsCaseInsensitive.applyRule(query, be));
assertFalse(bsCaseSensitiveRegexp.applyRule(query, be));
assertFalse(bsCaseInsensitiveRegexp.applyRule(query, be));

query = "marine [A-Za-z]* larviculture";

assertEquals(false, bsCaseSensitive.applyRule(query, be));
assertEquals(false, bsCaseInsensitive.applyRule(query, be));
assertEquals(false, bsCaseSensitiveRegexp.applyRule(query, be));
assertEquals(true, bsCaseInsensitiveRegexp.applyRule(query, be));
assertFalse(bsCaseSensitive.applyRule(query, be));
assertFalse(bsCaseInsensitive.applyRule(query, be));
assertFalse(bsCaseSensitiveRegexp.applyRule(query, be));
assertTrue(bsCaseInsensitiveRegexp.applyRule(query, be));
}

public BibEntry makeBibtexEntry() {
BibEntry e = new BibEntry(StandardEntryType.InCollection);
e.setField(StandardField.TITLE, "Marine finfish larviculture in Europe");
e.setCitationKey("shields01");
e.setField(StandardField.YEAR, "2001");
e.setField(StandardField.AUTHOR, "Kevin Shields");
return e;
return new BibEntry(StandardEntryType.InCollection)
.withCitationKey("shields01")
.withField(StandardField.TITLE, "Marine finfish larviculture in Europe")
.withField(StandardField.YEAR, "2001")
.withField(StandardField.AUTHOR, "Kevin Shields");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.jabref.model.search.rules;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* Test case for GrammarBasedSearchRuleTest.
*/
public class GrammarBasedSearchRuleTest {

@Test
void applyRuleMatchesSingleTermWithRegex() {
GrammarBasedSearchRule searchRule = new GrammarBasedSearchRule(true, true);

String query = "M[a-z]+e";
assertTrue(searchRule.validateSearchStrings(query));
assertTrue(searchRule.applyRule(query, makeBibtexEntry()));
}

@Test
void applyRuleDoesNotMatchSingleTermWithRegex() {
GrammarBasedSearchRule searchRule = new GrammarBasedSearchRule(true, true);

String query = "M[0-9]+e";
assertTrue(searchRule.validateSearchStrings(query));
assertFalse(searchRule.applyRule(query, makeBibtexEntry()));
}

public BibEntry makeBibtexEntry() {
return new BibEntry(StandardEntryType.InCollection)
.withCitationKey("shields01")
.withField(StandardField.TITLE, "Marine finfish larviculture in Europe")
.withField(StandardField.YEAR, "2001")
.withField(StandardField.AUTHOR, "Kevin Shields");
}
}

0 comments on commit 6f35c36

Please sign in to comment.