Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ object FunctionRegistry {
expression[Chr]("char"),
expression[Chr]("chr"),
expression[Base64]("base64"),
expression[BitLength]("bit_length"),
expression[Length]("char_length"),
expression[Concat]("concat"),
expression[ConcatWs]("concat_ws"),
expression[Decode]("decode"),
Expand All @@ -321,6 +323,7 @@ object FunctionRegistry {
expression[Levenshtein]("levenshtein"),
expression[Like]("like"),
expression[Lower]("lower"),
expression[OctetLength]("octet_length"),
expression[StringLocate]("locate"),
expression[StringLPad]("lpad"),
expression[StringTrimLeft]("ltrim"),
Expand Down
61 changes: 59 additions & 2 deletions ...catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -1199,15 +1199,18 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
}

/**
* A function that return the length of the given string or binary expression.
* A function that returns the char length of the given string expression or
* number of bytes of the given binary expression.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the length of `expr` or number of bytes in binary data.",
usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data.",
extended = """
Examples:
> SELECT _FUNC_('Spark SQL');
9
""")
// scalastyle:on line.size.limit
case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
Expand All @@ -1225,6 +1228,60 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
}
}

/**
* A function that returns the bit length of the given string or binary expression.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the bit length of `expr` or number of bits in binary data.",
extended = """
Examples:
> SELECT _FUNC_('Spark SQL');
72
""")
case class BitLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))

protected override def nullSafeEval(value: Any): Any = child.dataType match {
case StringType => value.asInstanceOf[UTF8String].numBytes * 8
case BinaryType => value.asInstanceOf[Array[Byte]].length * 8
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
child.dataType match {
case StringType => defineCodeGen(ctx, ev, c => s"($c).numBytes() * 8")
case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length * 8")
}
}
}

/**
* A function that returns the byte length of the given string or binary expression.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the byte length of `expr` or number of bytes in binary data.",
extended = """
Examples:
> SELECT _FUNC_('Spark SQL');
9
""")
case class OctetLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))

protected override def nullSafeEval(value: Any): Any = child.dataType match {
case StringType => value.asInstanceOf[UTF8String].numBytes
case BinaryType => value.asInstanceOf[Array[Byte]].length
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
child.dataType match {
case StringType => defineCodeGen(ctx, ev, c => s"($c).numBytes()")
case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length")
}
}
}

/**
* A function that return the Levenshtein distance between the two given strings.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,20 +558,40 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
// scalastyle:off
// non ascii characters are not allowed in the source code, so we disable the scalastyle.
checkEvaluation(Length(Literal("a花花c")), 4, create_row(string))
checkEvaluation(OctetLength(Literal("a花花c")), 8, create_row(string))
checkEvaluation(BitLength(Literal("a花花c")), 8 * 8, create_row(string))
// scalastyle:on
checkEvaluation(Length(Literal(bytes)), 5, create_row(Array.empty[Byte]))
checkEvaluation(OctetLength(Literal(bytes)), 5, create_row(Array.empty[Byte]))
checkEvaluation(BitLength(Literal(bytes)), 5 * 8, create_row(Array.empty[Byte]))

checkEvaluation(Length(a), 5, create_row(string))
checkEvaluation(OctetLength(a), 5, create_row(string))
checkEvaluation(BitLength(a), 5 * 8, create_row(string))
checkEvaluation(Length(b), 5, create_row(bytes))
checkEvaluation(OctetLength(b), 5, create_row(bytes))
checkEvaluation(BitLength(b), 5 * 8, create_row(bytes))

checkEvaluation(Length(a), 0, create_row(""))
checkEvaluation(OctetLength(a), 0, create_row(""))
checkEvaluation(BitLength(a), 0, create_row(""))
checkEvaluation(Length(b), 0, create_row(Array.empty[Byte]))
checkEvaluation(OctetLength(b), 0, create_row(Array.empty[Byte]))
checkEvaluation(BitLength(b), 0, create_row(Array.empty[Byte]))

checkEvaluation(Length(a), null, create_row(null))
checkEvaluation(OctetLength(a), null, create_row(null))
checkEvaluation(BitLength(a), null, create_row(null))
checkEvaluation(Length(b), null, create_row(null))
checkEvaluation(OctetLength(b), null, create_row(null))
checkEvaluation(BitLength(b), null, create_row(null))

checkEvaluation(Length(Literal.create(null, StringType)), null, create_row(string))
checkEvaluation(OctetLength(Literal.create(null, StringType)), null, create_row(string))
checkEvaluation(BitLength(Literal.create(null, StringType)), null, create_row(string))
checkEvaluation(Length(Literal.create(null, BinaryType)), null, create_row(bytes))
checkEvaluation(OctetLength(Literal.create(null, BinaryType)), null, create_row(bytes))
checkEvaluation(BitLength(Literal.create(null, BinaryType)), null, create_row(bytes))
}

test("format_number / FormatNumber") {
Expand Down
5 changes: 5 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/operators.sql
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,8 @@ select 1 > 0.00001;

-- mod
select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null);

-- length
select BIT_LENGTH('abc');
select CHAR_LENGTH('abc');
select OCTET_LENGTH('abc');
26 changes: 25 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/operators.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 51
-- Number of queries: 54


-- !query 0
Expand Down Expand Up @@ -420,3 +420,27 @@ select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, nu
struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS DOUBLE)):double>
-- !query 50 output
1 NULL 0 NULL NULL NULL


-- !query 51
select BIT_LENGTH('abc')
-- !query 51 schema
struct<bitlength(abc):int>
-- !query 51 output
24


-- !query 52
select CHAR_LENGTH('abc')
-- !query 52 schema
struct<length(abc):int>
-- !query 52 output
3


-- !query 53
select OCTET_LENGTH('abc')
-- !query 53 schema
struct<octetlength(abc):int>
-- !query 53 output
3