Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,8 @@ object FunctionRegistry {
expression[FormatString]("format_string"),
expression[ToNumber]("to_number"),
expression[TryToNumber]("try_to_number"),
expression[ToCharacter]("to_char"),
expression[ToCharacter]("to_varchar", setAlias = true, Some("3.5.0")),
expressionBuilder("to_char", ToCharacterBuilder),
expressionBuilder("to_varchar", ToCharacterBuilder, setAlias = true, Some("3.5.0")),
expression[GetJsonObject]("get_json_object"),
expression[InitCap]("initcap"),
expression[StringInstr]("instr"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ package org.apache.spark.sql.catalyst.expressions

import java.util.Locale

import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.expressions.Cast._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
import org.apache.spark.sql.catalyst.util.ToNumberParser
import org.apache.spark.sql.types.{AbstractDataType, DataType, Decimal, DecimalType, StringType}
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.types.{AbstractDataType, DataType, DatetimeType, Decimal, DecimalType, StringType}
import org.apache.spark.unsafe.types.UTF8String

abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Boolean)
Expand Down Expand Up @@ -181,12 +182,13 @@ case class TryToNumber(left: Expression, right: Expression)
}

/**
* A function that converts decimal values to strings, returning NULL if the decimal value fails to
* A function that converts decimal/datetime values to strings, returning NULL if the value fails to
* match the format string.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_(numberExpr, formatExpr) - Convert `numberExpr` to a string based on the `formatExpr`.
_FUNC_(expr, format) - Convert `expr` to a string based on the `format`.
Throws an exception if the conversion fails. The format can consist of the following
characters, case insensitive:
'0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
Expand All @@ -206,6 +208,7 @@ case class TryToNumber(left: Expression, right: Expression)
'PR': Only allowed at the end of the format string; specifies that the result string will be
wrapped by angle brackets if the input value is negative.
('<1>').
If `expr` is a datetime, `format` shall be a valid datetime pattern, see <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>.
""",
examples = """
Examples:
Expand All @@ -219,9 +222,27 @@ case class TryToNumber(left: Expression, right: Expression)
$78.12
> SELECT _FUNC_(-12454.8, '99G999D9S');
12,454.8-
> SELECT _FUNC_(date'2016-04-08', 'y');
2016
""",
since = "3.4.0",
group = "string_funcs")
// scalastyle:on line.size.limit
object ToCharacterBuilder extends ExpressionBuilder {
override def build(funcName: String, expressions: Seq[Expression]): Expression = {
val numArgs = expressions.length
if (expressions.length == 2) {
val inputExpr = expressions.head
inputExpr.dataType match {
case _: DatetimeType => DateFormatClass(inputExpr, expressions(1))
case _ => ToCharacter(inputExpr, expressions(1))
}
} else {
throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2), numArgs)
}
}
}

case class ToCharacter(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
private lazy val numberFormatter = {
Expand Down
8 changes: 2 additions & 6 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4405,9 +4405,7 @@ object functions {
* @group string_funcs
* @since 3.5.0
*/
def to_char(e: Column, format: Column): Column = withExpr {
ToCharacter(e.expr, format.expr)
}
def to_char(e: Column, format: Column): Column = call_function("to_char", e, format)

/**
* Convert `e` to a string based on the `format`.
Expand All @@ -4433,9 +4431,7 @@ object functions {
* @group string_funcs
* @since 3.5.0
*/
def to_varchar(e: Column, format: Column): Column = withExpr {
ToCharacter(e.expr, format.expr)
}
def to_varchar(e: Column, format: Column): Column = to_char(e, format)

/**
* Convert string 'e' to a number based on the string format 'format'.
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -9,60 +9,65 @@ create temporary view v as select col from values
(timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
(timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col);

select col, date_format(col, 'G GG GGG GGGG') from v;
select col, date_format(col, 'G GG GGG GGGG'), to_char(col, 'G GG GGG GGGG'), to_varchar(col, 'G GG GGG GGGG') from v;

select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy') from v;
select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy'), to_char(col, 'y yy yyy yyyy yyyyy yyyyyy'), to_varchar(col, 'y yy yyy yyyy yyyyy yyyyyy') from v;

select col, date_format(col, 'q qq') from v;
select col, date_format(col, 'q qq'), to_char(col, 'q qq'), to_varchar(col, 'q qq') from v;

select col, date_format(col, 'Q QQ QQQ QQQQ') from v;
select col, date_format(col, 'Q QQ QQQ QQQQ'), to_char(col, 'Q QQ QQQ QQQQ'), to_varchar(col, 'Q QQ QQQ QQQQ') from v;

select col, date_format(col, 'M MM MMM MMMM') from v;
select col, date_format(col, 'M MM MMM MMMM'), to_char(col, 'M MM MMM MMMM'), to_varchar(col, 'M MM MMM MMMM') from v;

select col, date_format(col, 'L LL') from v;
select col, date_format(col, 'L LL'), to_char(col, 'L LL'), to_varchar(col, 'L LL') from v;

select col, date_format(col, 'E EE EEE EEEE') from v;
select col, date_format(col, 'E EE EEE EEEE'), to_char(col, 'E EE EEE EEEE'), to_varchar(col, 'E EE EEE EEEE') from v;

select col, date_format(col, 'F') from v;
select col, date_format(col, 'F'), to_char(col, 'F'), to_varchar(col, 'F') from v;

select col, date_format(col, 'd dd') from v;
select col, date_format(col, 'd dd'), to_char(col, 'd dd'), to_varchar(col, 'd dd') from v;

select col, date_format(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles';
select col, date_format(col, 'D DDD') from v;
select col, date_format(col, 'DD'), to_char(col, 'DD'), to_varchar(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles';
select col, date_format(col, 'D DDD'), to_char(col, 'D DDD'), to_varchar(col, 'D DDD') from v;

select col, date_format(col, 'H HH') from v;
select col, date_format(col, 'H HH'), to_char(col, 'H HH'), to_varchar(col, 'H HH') from v;

select col, date_format(col, 'h hh') from v;
select col, date_format(col, 'h hh'), to_char(col, 'h hh'), to_varchar(col, 'h hh') from v;

select col, date_format(col, 'k kk') from v;
select col, date_format(col, 'k kk'), to_char(col, 'k kk'), to_varchar(col, 'k kk') from v;

select col, date_format(col, 'K KK') from v;
select col, date_format(col, 'K KK'), to_char(col, 'K KK'), to_varchar(col, 'K KK') from v;

select col, date_format(col, 'm mm') from v;
select col, date_format(col, 'm mm'), to_char(col, 'm mm'), to_varchar(col, 'm mm') from v;

select col, date_format(col, 's ss') from v;
select col, date_format(col, 's ss'), to_char(col, 's ss'), to_varchar(col, 's ss') from v;

select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v;
select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS'), to_char(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS'), to_varchar(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v;

select col, date_format(col, 'a') from v;
select col, date_format(col, 'a'), to_char(col, 'a'), to_varchar(col, 'a') from v;

select col, date_format(col, 'VV') from v;
select col, date_format(col, 'VV'), to_char(col, 'VV'), to_varchar(col, 'VV') from v;

select col, date_format(col, 'z zz zzz zzzz') from v;
select col, date_format(col, 'z zz zzz zzzz'), to_char(col, 'z zz zzz zzzz'), to_varchar(col, 'z zz zzz zzzz') from v;

select col, date_format(col, 'X XX XXX') from v;
select col, date_format(col, 'XXXX XXXXX') from v;
select col, date_format(col, 'X XX XXX'), to_char(col, 'X XX XXX'), to_varchar(col, 'X XX XXX') from v;
select col, date_format(col, 'XXXX XXXXX'), to_char(col, 'XXXX XXXXX'), to_varchar(col, 'XXXX XXXXX') from v;

select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v;
select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ'), to_char(col, 'Z ZZ ZZZ ZZZZ ZZZZZ'), to_varchar(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v;

select col, date_format(col, 'O OOOO') from v;
select col, date_format(col, 'O OOOO'), to_char(col, 'O OOOO'), to_varchar(col, 'O OOOO') from v;

select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx') from v;
select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx'), to_char(col, 'x xx xxx xxxx xxxx xxxxx'), to_varchar(col, 'x xx xxx xxxx xxxx xxxxx') from v;

-- optional pattern, but the results won't be optional for formatting
select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]') from v;
select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]'), to_char(col, '[yyyy-MM-dd HH:mm:ss]'), to_varchar(col, '[yyyy-MM-dd HH:mm:ss]') from v;

-- literals
select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v;
select col, date_format(col, "''") from v;
select col, date_format(col, '') from v;
select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'"), to_char(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'"), to_varchar(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v;
select col, date_format(col, "''"), to_char(col, "''"), to_varchar(col, "''") from v;
select col, date_format(col, ''), to_char(col, ''), to_varchar(col, '') from v;

-- different datetime types
select date_format(date'2023-08-18', 'yyyy-MM-dd'), to_char(date'2023-08-18', 'yyyy-MM-dd'), to_varchar(date'2023-08-18', 'yyyy-MM-dd');
select date_format(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_char(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_varchar(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_char(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_varchar(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS');
Loading