diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 00a1964c9501..904a4f5b0471 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1877,7 +1877,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging IntervalUtils.fromString(value) } catch { case e: IllegalArgumentException => - val ex = new ParseException("Cannot parse the INTERVAL value: " + value, ctx) + val ex = new ParseException(s"Cannot parse the INTERVAL value: $value\n" + + e.getMessage, ctx) ex.setStackTrace(e.getStackTrace) throw ex } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index b66cae797941..76c703737b97 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -36,14 +36,6 @@ import org.apache.spark.unsafe.types.CalendarInterval */ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Logging { - /** - * Creates [[CalendarInterval]] for a given SQL String. Throws [[ParseException]] if the SQL - * string is not a valid interval format. - */ - def parseInterval(sqlText: String): CalendarInterval = parse(sqlText) { parser => - astBuilder.visitSingleInterval(parser.singleInterval()) - } - /** Creates/Resolves DataType for a given SQL string. */ override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser => astBuilder.visitSingleDataType(parser.singleDataType()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 19bb44f1e48a..67b55c46d86c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -17,11 +17,11 @@ package org.apache.spark.sql.catalyst.util +import java.util.Locale import java.util.concurrent.TimeUnit import scala.util.control.NonFatal -import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -101,34 +101,6 @@ object IntervalUtils { Decimal(result, 18, 6) } - /** - * Converts a string to [[CalendarInterval]] case-insensitively. - * - * @throws IllegalArgumentException if the input string is not in valid interval format. - */ - def fromString(str: String): CalendarInterval = { - if (str == null) throw new IllegalArgumentException("Interval string cannot be null") - try { - CatalystSqlParser.parseInterval(str) - } catch { - case e: ParseException => - val ex = new IllegalArgumentException(s"Invalid interval string: $str\n" + e.message) - ex.setStackTrace(e.getStackTrace) - throw ex - } - } - - /** - * A safe version of `fromString`. It returns null for invalid input string. - */ - def safeFromString(str: String): CalendarInterval = { - try { - fromString(str) - } catch { - case _: IllegalArgumentException => null - } - } - private def toLongWithRange( fieldName: IntervalUnit, s: String, @@ -250,6 +222,78 @@ object IntervalUtils { } } + private val isYear: String => Boolean = + y => """y((r)|(rs)|(ear)|(ears))?""".r.pattern.matcher(y).matches() + private val isMonth: String => Boolean = + mon => """mon((s)|(th)|(ths))?""".r.pattern.matcher(mon).matches() + private val isWeek: String => Boolean = + w => """w((eek)|(eeks))?""".r.pattern.matcher(w).matches() + private val isDay: String => Boolean = + d => """d((ay)|(ays))?""".r.pattern.matcher(d).matches() + private val isHour: String => Boolean = + h => """h((r)|(rs)|(our)|(ours))?""".r.pattern.matcher(h).matches() + private val isMinute: String => Boolean = + m => """m((in)|(ins)|(inute)|(inutes))?""".r.pattern.matcher(m).matches() + private val isSecond: String => Boolean = + s => """s((ec)|(ecs)|(econd)|(econds))?""".r.pattern.matcher(s).matches() + private val isMs: String => Boolean = + ms => """(ms((ec)|(ecs)|(econds))?|(millisecond)[s]?)""".r.pattern.matcher(ms).matches() + private val isUs: String => Boolean = + us => """(us((ec)|(ecs)|(econds))?|(microsecond)[s]?)""".r.pattern.matcher(us).matches() + + /** + * Converts a string with multiple value unit pairs to [[CalendarInterval]] case-insensitively. + * + * @throws IllegalArgumentException if the input string is not in valid interval format. + */ + def fromString(str: String): CalendarInterval = { + if (str == null) throw new IllegalArgumentException("Interval multi unit string cannot be null") + var months: Int = 0 + var days: Int = 0 + var us: Long = 0L + val unitValuePart = str.trim.stripPrefix("interval ").trim + var array = "-\\s+".r.replaceAllIn(unitValuePart, "-").toLowerCase(Locale.ROOT) + .split("\\s+").filter(_ != "+").toList + require(array.nonEmpty && array.length % 2 == 0, + "Interval string should be value and unit pairs") + + try { + while (array.nonEmpty) { + array match { + case valueStr :: unit :: tail => + if (isYear(unit)) { + months = Math.addExact(months, Math.multiplyExact(valueStr.toInt, MONTHS_PER_YEAR)) + } else if (isMonth(unit)) { + months = Math.addExact(months, valueStr.toInt) + } else if (isWeek(unit)) { + days = Math.addExact(days, Math.multiplyExact(valueStr.toInt, DAYS_PER_WEEK)) + } else if (isDay(unit)) { + days = Math.addExact(days, valueStr.toInt) + } else if (isHour(unit)) { + us = Math.addExact(us, Math.multiplyExact(valueStr.toLong, MICROS_PER_HOUR)) + } else if (isMinute(unit)) { + us = Math.addExact(us, Math.multiplyExact(valueStr.toLong, MICROS_PER_MINUTE)) + } else if (isSecond(unit)) { + us = Math.addExact(us, parseSecondNano(valueStr)) + } else if (isMs(unit)) { + us = Math.addExact(us, Math.multiplyExact(valueStr.toLong, MICROS_PER_MILLIS)) + } else if (isUs(unit)) { + us = Math.addExact(us, valueStr.toLong) + } else { + throw new IllegalArgumentException(s"Error paring interval unit: $unit") + } + array = tail + case _ => // never reach + } + } + new CalendarInterval(months, days, us) + } catch { + case e: IllegalArgumentException => throw e + case e: Exception => + throw new IllegalArgumentException(s"Invalid interval string: $str\n" + e.getMessage, e) + } + } + def fromUnitStrings(units: Array[IntervalUnit], values: Array[String]): CalendarInterval = { assert(units.length == values.length) var months: Int = 0 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 75b0afceca14..d7ab43779573 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -68,9 +68,11 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromInvalidString(null, "cannot be null") - for (input <- Seq("", " ", "interval", "interval1 day", "foo", "foo 1 day")) { - checkFromInvalidString(input, "Invalid interval string") + for (input <- Seq("", " ", "interval", "foo", "foo 1 day")) { + checkFromInvalidString(input, "Interval string should be value and unit pairs") } + + checkFromInvalidString("interval1 day", "For input string") } @@ -79,7 +81,7 @@ class IntervalUtilsSuite extends SparkFunSuite { "-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1), " 123 MONTHS 123 DAYS 123 Microsecond " -> new CalendarInterval(123, 123, 123), "interval -1 day +3 Microseconds" -> new CalendarInterval(0, -1, 3), - " interval 8 years -11 months 123 weeks -1 day " + + "interval 8 years -11 months 123 weeks -1 day " + "23 hours -22 minutes 1 second -123 millisecond 567 microseconds " -> new CalendarInterval(85, 860, 81480877567L)).foreach { case (input, expected) => checkFromString(input, expected) @@ -92,8 +94,8 @@ class IntervalUtilsSuite extends SparkFunSuite { // Allow duplicated units and summarize their values checkFromString("1 day 10 day", new CalendarInterval(0, 11, 0)) // Only the seconds units can have the fractional part - checkFromInvalidString("1.5 days", "Error parsing interval string") - checkFromInvalidString("1. hour", "Error parsing interval string") + checkFromInvalidString("1.5 days", "1.5") + checkFromInvalidString("1. hour", "1.") } test("string to interval: seconds with fractional part") { @@ -105,7 +107,7 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromString("-1.5 seconds", new CalendarInterval(0, 0, -1500000)) // truncate nanoseconds to microseconds checkFromString("0.999999999 seconds", new CalendarInterval(0, 0, 999999)) - checkFromInvalidString("0.123456789123 seconds", "Error parsing interval string") + checkFromInvalidString("0.123456789123 seconds", "outside range [0, 999999999]") } test("from year-month string") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 2163a128aacf..41b3f556675e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -65,3 +65,23 @@ select make_interval(1, 2, 3, 4); select make_interval(1, 2, 3, 4, 5); select make_interval(1, 2, 3, 4, 5, 6); select make_interval(1, 2, 3, 4, 5, 6, 7.008009); + +-- abbreviations support for interval multi unit string +select interval '1 years 2 months 3 weeks 4 days 5 hours 6 minutes 7 seconds 8 milliseconds 9 microseconds'; +select interval '1 year 2 month 3 week 4 day 5 hour 6 minute 7 second 8 millisecond 9 microsecond'; +select interval '1 y 2 mon 3 w 4 d 5 h 6 m 7 s 8 ms 9 us'; +select interval '1 y 2 yr -1 yrs 3 years -2 mons - 3 w + 4 d +5 hrs 6 mins 7 secs 8 msecs 9 usecs'; +select interval 'day 1 y'; +select interval '1 1 y'; +select interval '1 1 1 y'; +select interval '1 daY'; +select interval ''; +select interval ' '; +select interval '-'; +select interval '- year'; +select interval '-1 yea'; + +-- abbreviations not cover these which require units as keywords in sql parser +select interval 1 y 2 mon 3 week; +select interval '30' y '25' mon; +select interval '-1 yrs' year to month; diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 355a76d56559..01b0462dc5b9 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 38 +-- Number of queries: 54 -- !query 0 @@ -306,3 +306,205 @@ select make_interval(1, 2, 3, 4, 5, 6, 7.008009) struct -- !query 37 output 1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 38 +select interval '1 years 2 months 3 weeks 4 days 5 hours 6 minutes 7 seconds 8 milliseconds 9 microseconds' +-- !query 38 schema +struct<1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds:interval> +-- !query 38 output +1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 39 +select interval '1 year 2 month 3 week 4 day 5 hour 6 minute 7 second 8 millisecond 9 microsecond' +-- !query 39 schema +struct<1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds:interval> +-- !query 39 output +1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 40 +select interval '1 y 2 mon 3 w 4 d 5 h 6 m 7 s 8 ms 9 us' +-- !query 40 schema +struct<1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds:interval> +-- !query 40 output +1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 41 +select interval '1 y 2 yr -1 yrs 3 years -2 mons - 3 w + 4 d +5 hrs 6 mins 7 secs 8 msecs 9 usecs' +-- !query 41 schema +struct<4 years 10 months -17 days 5 hours 6 minutes 7.008009 seconds:interval> +-- !query 41 output +4 years 10 months -17 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 42 +select interval 'day 1 y' +-- !query 42 schema +struct<> +-- !query 42 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: day 1 y +requirement failed: Interval string should be value and unit pairs(line 1, pos 7) + +== SQL == +select interval 'day 1 y' +-------^^^ + + +-- !query 43 +select interval '1 1 y' +-- !query 43 schema +struct<> +-- !query 43 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1 1 y +requirement failed: Interval string should be value and unit pairs(line 1, pos 7) + +== SQL == +select interval '1 1 y' +-------^^^ + + +-- !query 44 +select interval '1 1 1 y' +-- !query 44 schema +struct<> +-- !query 44 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1 1 1 y +Error paring interval unit: 1(line 1, pos 7) + +== SQL == +select interval '1 1 1 y' +-------^^^ + + +-- !query 45 +select interval '1 daY' +-- !query 45 schema +struct<1 days:interval> +-- !query 45 output +1 days + + +-- !query 46 +select interval '' +-- !query 46 schema +struct<> +-- !query 46 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: +requirement failed: Interval string should be value and unit pairs(line 1, pos 7) + +== SQL == +select interval '' +-------^^^ + + +-- !query 47 +select interval ' ' +-- !query 47 schema +struct<> +-- !query 47 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: +requirement failed: Interval string should be value and unit pairs(line 1, pos 7) + +== SQL == +select interval ' ' +-------^^^ + + +-- !query 48 +select interval '-' +-- !query 48 schema +struct<> +-- !query 48 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: - +requirement failed: Interval string should be value and unit pairs(line 1, pos 7) + +== SQL == +select interval '-' +-------^^^ + + +-- !query 49 +select interval '- year' +-- !query 49 schema +struct<> +-- !query 49 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: - year +requirement failed: Interval string should be value and unit pairs(line 1, pos 7) + +== SQL == +select interval '- year' +-------^^^ + + +-- !query 50 +select interval '-1 yea' +-- !query 50 schema +struct<> +-- !query 50 output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: -1 yea +Error paring interval unit: yea(line 1, pos 7) + +== SQL == +select interval '-1 yea' +-------^^^ + + +-- !query 51 +select interval 1 y 2 mon 3 week +-- !query 51 schema +struct<> +-- !query 51 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input '1 y'(line 1, pos 18) + +== SQL == +select interval 1 y 2 mon 3 week +------------------^^^ + + +-- !query 52 +select interval '30' y '25' mon +-- !query 52 schema +struct<> +-- !query 52 output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input ''25'' expecting {, ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 1, pos 23) + +== SQL == +select interval '30' y '25' mon +-----------------------^^^ + + +-- !query 53 +select interval '-1 yrs' year to month +-- !query 53 schema +struct<> +-- !query 53 output +org.apache.spark.sql.catalyst.parser.ParseException + +Interval string does not match year-month format of 'y-m': -1 yrs(line 1, pos 16) + +== SQL == +select interval '-1 yrs' year to month +----------------^^^