Skip to content

Commit

Permalink
HBASE-27219 Change JONI encoding in RegexStringComparator (#4632)
Browse files Browse the repository at this point in the history
Signed-off-by: Andrew Purtell <apurtell@apache.org>
  • Loading branch information
mwkang authored and apurtell committed Jul 19, 2022
1 parent 25c375e commit ee56bca
Showing 1 changed file with 4 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.yetus.audience.InterfaceAudience;
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.specific.NonStrictUTF8Encoding;
import org.joni.Matcher;
import org.joni.Option;
import org.joni.Regex;
Expand Down Expand Up @@ -312,7 +312,9 @@ public byte[] toByteArray() {
* NOTE: Only the {@link Pattern} flags CASE_INSENSITIVE, DOTALL, and MULTILINE are supported.
*/
static class JoniRegexEngine implements Engine {
private Encoding encoding = UTF8Encoding.INSTANCE;
// When using UTF8Encoding, an infinite loop can occur if an invalid UTF8 is encountered.
// Use NonStrictUTF8Encoding instead of UTF8Encoding to avoid the issue.
private Encoding encoding = NonStrictUTF8Encoding.INSTANCE;
private String regex;
private Regex pattern;

Expand Down

0 comments on commit ee56bca

Please sign in to comment.