diff --git a/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java b/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java index 4a41608d26..5e725c1c0e 100644 --- a/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java +++ b/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java @@ -40,6 +40,7 @@ import net.sf.jsqlparser.statement.insert.Insert; import net.sf.jsqlparser.statement.update.Update; import org.apache.commons.codec.binary.Hex; +import org.apache.commons.lang3.StringEscapeUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +53,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Objects; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * @author jiangbo @@ -65,8 +64,6 @@ public class LogParser { public static SnowflakeIdWorker idWorker = new SnowflakeIdWorker(1, 1); - public static final Pattern pattern = Pattern.compile("(\\\\u(\\w{4}))"); - private final LogMinerConf config; public LogParser(LogMinerConf config) { @@ -206,26 +203,9 @@ public static String parseTime(String value) { return value.substring(17, value.length() - 2); } - // support nchar、nvarchar2 chinese value - if (value.startsWith("UNISTR('") && value.endsWith("')")) { - String substring = value.substring(8, value.length() - 2); - String replace = substring.replace("\\", "\\u"); - return unicodeToString(replace); - } - return value; } - public static String unicodeToString(String str) { - Matcher matcher = pattern.matcher(str); - char ch; - while (matcher.find()) { - ch = (char) Integer.parseInt(matcher.group(2), 16); - str = str.replace(matcher.group(1), String.valueOf(ch)); - } - return str; - } - public static String parseString(String value) { if (!value.endsWith("')")) { return value; @@ -252,6 +232,13 @@ public static String parseString(String value) { return value.substring(15, value.length() - 2); } + // support nchar、nvarchar2 chinese value + if (value.startsWith("UNISTR('") && value.endsWith("')")) { + String substring = value.substring(8, value.length() - 2); + String replace = substring.replace("\\", "\\u"); + return StringEscapeUtils.unescapeJava(replace); + } + return value; } diff --git a/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java b/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java index 13eb75b08c..f1923657cc 100644 --- a/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java +++ b/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java @@ -18,20 +18,16 @@ package com.dtstack.chunjun.util; +import org.apache.commons.lang3.StringEscapeUtils; import org.junit.Assert; import org.junit.Test; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - /** * @author liuche * @date 2022/8/26 14:39 */ public class UnicodeToStringTest { - public static final Pattern pattern = Pattern.compile("(\\\\u(\\w{4}))"); - @Test public void testUnicodeToString() { String str = "UNISTR('\\5927\\6D77')"; @@ -39,18 +35,8 @@ public void testUnicodeToString() { if (str.startsWith("UNISTR('") && str.endsWith("')")) { String substring = str.substring(8, str.length() - 2); String replace = substring.replace("\\", "\\u"); - str = unicodeToString(replace); + str = StringEscapeUtils.unescapeJava(replace); } Assert.assertEquals(str, "大海"); } - - public static String unicodeToString(String str) { - Matcher matcher = pattern.matcher(str); - char ch; - while (matcher.find()) { - ch = (char) Integer.parseInt(matcher.group(2), 16); - str = str.replace(matcher.group(1), String.valueOf(ch)); - } - return str; - } }