From ea42bbda92aead99fe8ad91e33e57c0ba573920b Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 11 Nov 2020 18:28:58 +0800 Subject: [PATCH 1/2] init --- .../expressions/stringExpressions.scala | 36 +++++++++---------- .../expressions/StringExpressionsSuite.scala | 1 + 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 1fe990207160..00576363b5cb 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.net.{URI, URISyntaxException} +import java.net.{MalformedURLException, URL} import java.text.{BreakIterator, DecimalFormat, DecimalFormatSymbols} import java.util.{HashMap, Locale, Map => JMap} import java.util.regex.Pattern @@ -1373,15 +1373,15 @@ case class ParseUrl(children: Seq[Expression]) Pattern.compile(REGEXPREFIX + key.toString + REGEXSUBFIX) } - private def getUrl(url: UTF8String): URI = { + private def getUrl(url: UTF8String): URL = { try { - new URI(url.toString) + new URL(url.toString) } catch { - case e: URISyntaxException => null + case _: MalformedURLException => null } } - private def getExtractPartFunc(partToExtract: UTF8String): URI => String = { + private def getExtractPartFunc(partToExtract: UTF8String): URL => String = { // partToExtract match { // case HOST => _.toURL().getHost @@ -1392,25 +1392,25 @@ case class ParseUrl(children: Seq[Expression]) // case FILE => _.toURL().getFile // case AUTHORITY => _.toURL().getAuthority // case USERINFO => _.toURL().getUserInfo - // case _ => (url: URI) => null + // case _ => (url: URL) => null // } partToExtract match { case HOST => _.getHost - case PATH => _.getRawPath - case QUERY => _.getRawQuery - case REF => _.getRawFragment - case PROTOCOL => _.getScheme + case PATH => _.getPath + case QUERY => _.getQuery + case REF => _.getRef + case PROTOCOL => _.getProtocol case FILE => - (url: URI) => - if (url.getRawQuery ne null) { - url.getRawPath + "?" + url.getRawQuery + (url: URL) => + if (url.getPath ne null) { + url.getPath + "?" + url.getQuery } else { - url.getRawPath + url.getPath } - case AUTHORITY => _.getRawAuthority - case USERINFO => _.getRawUserInfo - case _ => (url: URI) => null + case AUTHORITY => _.getAuthority + case USERINFO => _.getUserInfo + case _ => (url: URL) => null } } @@ -1423,7 +1423,7 @@ case class ParseUrl(children: Seq[Expression]) } } - private def extractFromUrl(url: URI, partToExtract: UTF8String): UTF8String = { + private def extractFromUrl(url: URL, partToExtract: UTF8String): UTF8String = { if (cachedExtractPartFunc ne null) { UTF8String.fromString(cachedExtractPartFunc.apply(url)) } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 967ccc42c632..de549589b6bf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -904,6 +904,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } checkParseUrl("spark.apache.org", "http://spark.apache.org/path?query=1", "HOST") + checkParseUrl("a.b.c", "https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST") checkParseUrl("/path", "http://spark.apache.org/path?query=1", "PATH") checkParseUrl("query=1", "http://spark.apache.org/path?query=1", "QUERY") checkParseUrl("Ref", "http://spark.apache.org/path?query=1#Ref", "REF") From f62e88ba760283ecba2773a2fa97e6a8b361fa92 Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 11 Nov 2020 18:58:43 +0800 Subject: [PATCH 2/2] init --- .../sql/catalyst/expressions/stringExpressions.scala | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 00576363b5cb..b8da3060f3cc 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1401,13 +1401,7 @@ case class ParseUrl(children: Seq[Expression]) case QUERY => _.getQuery case REF => _.getRef case PROTOCOL => _.getProtocol - case FILE => - (url: URL) => - if (url.getPath ne null) { - url.getPath + "?" + url.getQuery - } else { - url.getPath - } + case FILE => _.getFile case AUTHORITY => _.getAuthority case USERINFO => _.getUserInfo case _ => (url: URL) => null