Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
* An expression that concatenates multiple input strings into a single string.
* If any input is null, concat returns null.
*/
@ExpressionDescription(
usage = "_FUNC_(string1, string2, ...) - Concatenates multiple strings together into a single " +
"string. If any input is null, concat returns null.",
extended = "> SELECT _FUNC_('This ', 'is ', 'a ', 'test');\n 'This is a test'")
case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
Expand Down Expand Up @@ -70,6 +74,11 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas
*
* Returns null if the separator is null. Otherwise, concat_ws skips all null values.
*/
@ExpressionDescription(
usage = "_FUNC_(separator, string1, string2, ...) - Concatenates multiple input string" +
" columns together into a single string, using a given separator (the first child). " +
"If any input is null, concat returns null.",
extended = "> SELECT _FUNC_(Seq (' ', 'This', 'is', 'a', 'test');\n 'This is a test'")
case class ConcatWs(children: Seq[Expression])
extends Expression with ImplicitCastInputTypes {

Expand Down Expand Up @@ -270,6 +279,12 @@ object StringTranslate {
* The translate will happen when any character in the string matching with the character
* in the `matchingExpr`.
*/
@ExpressionDescription(
usage = "_FUNC_(string, matchingString: String, replaceString) - Translates any character" +
" in the string by a character in replaceString. The characters in replaceString is " +
"corresponding to the characters in matchingString. The translate will happen when any " +
"character in the string matching with the character in the matchingString.",
extended = "> SELECT _FUNC_('translate', 'rnlt', '123');\n '1a2s3ae'")
case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

Expand Down Expand Up @@ -345,6 +360,9 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
/**
* A function that trim the spaces from both ends for the specified string.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Trims the spaces from both ends for the specified string.",
extended = "> SELECT _FUNC_(' SparkSql ');\n 'SparkSql'")
case class StringTrim(child: Expression)
extends UnaryExpression with String2StringExpression {

Expand All @@ -360,6 +378,9 @@ case class StringTrim(child: Expression)
/**
* A function that trim the spaces from left end for given string.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Trims the spaces from left end for given string.",
extended = "> SELECT _FUNC_(' SparkSql ');\n 'SparkSql '")
case class StringTrimLeft(child: Expression)
extends UnaryExpression with String2StringExpression {

Expand All @@ -375,6 +396,9 @@ case class StringTrimLeft(child: Expression)
/**
* A function that trim the spaces from right end for given string.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Trims the spaces from right end for given string.",
extended = "> SELECT _FUNC_(' SparkSql ');\n ' SparkSql'")
case class StringTrimRight(child: Expression)
extends UnaryExpression with String2StringExpression {

Expand All @@ -394,6 +418,11 @@ case class StringTrimRight(child: Expression)
*
* NOTE: that this is not zero based, but 1-based index. The first character in str has index 1.
*/
@ExpressionDescription(
usage = "_FUNC_(string, substr) - Returns the position of the first occurrence of substr in " +
"string. Returns null if either of the arguments are null and returns 0 if substr could " +
"not be found in string",
extended = "> SELECT _FUNC_('This is a test', 'is');\n 3")
case class StringInstr(str: Expression, substr: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

Expand All @@ -420,6 +449,13 @@ case class StringInstr(str: Expression, substr: Expression)
* returned. If count is negative, every to the right of the final delimiter (counting from the
* right) is returned. substring_index performs a case-sensitive match when searching for delim.
*/
@ExpressionDescription(
usage = "_FUNC_(str, delim, count) - Returns the substring from string str before count occurrences " +
"of the delimiter delim. If count is positive, everything the left of the final delimiter " +
"(counting from left) is returned. If count is negative, every to the right of the final delimiter " +
"(counting from the right) is returned. substring_index performs a case-sensitive match when searching " +
"for delim.",
extended = "> SELECT _FUNC_('www.apache.org','.',1);\n 'www'")
case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

Expand All @@ -443,6 +479,10 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
* A function that returns the position of the first occurrence of substr
* in given string after position pos.
*/
@ExpressionDescription(
usage = "_FUNC_(substr, string, pos) - Returns the position of the first occurrence of substr" +
" in given string after position pos.",
extended = "> SELECT _FUNC_('is', 'This is a test', 3);\n 6")
case class StringLocate(substr: Expression, str: Expression, start: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

Expand Down Expand Up @@ -508,6 +548,9 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
/**
* Returns str, left-padded with pad to a length of len.
*/
@ExpressionDescription(
usage = "_FUNC_(string, len, pad) - Left-pad the string with pad to a length of len.",
extended = "> SELECT _FUNC_('SparkSql', 16, 'Learning');\n 'LearningSparkSql'")
case class StringLPad(str: Expression, len: Expression, pad: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

Expand All @@ -529,6 +572,9 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
/**
* Returns str, right-padded with pad to a length of len.
*/
@ExpressionDescription(
usage = "_FUNC_(string, len, pad) - Right-pad the string with pad to a length of len.",
extended = "> SELECT _FUNC_('SparkSql', 16, ' is Cool');\n 'SparkSql is Cool'")
case class StringRPad(str: Expression, len: Expression, pad: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

Expand All @@ -550,6 +596,10 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression)
/**
* Returns the input formatted according do printf-style format strings
*/
@ExpressionDescription(
usage = "_FUNC_(arguments) - Formats the arguments in printf-style and returns the result " +
"as a string column.",
extended = "> SELECT _FUNC_('aa%d%s',123, 'cc');\n 'aa123cc'")
case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {

require(children.nonEmpty, "format_string() should take at least 1 argument")
Expand Down Expand Up @@ -619,6 +669,10 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
* Returns string, with the first letter of each word in uppercase.
* Words are delimited by whitespace.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Returns a new string column by converting the first letter of " +
"each word of string to uppercase.",
extended = "> SELECT _FUNC_('This is a test');\n 'This Is A Test'")
case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[DataType] = Seq(StringType)
Expand All @@ -635,6 +689,9 @@ case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastI
/**
* Returns the string which repeat the given string value n times.
*/
@ExpressionDescription(
usage = "_FUNC_(string, n) - Repeats a string column n times, and returns it as a new string.",
extended = "> SELECT _FUNC_('SparkSql', 3);\n 'SparkSqlSparkSqlSparkSql'")
case class StringRepeat(str: Expression, times: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

Expand All @@ -657,6 +714,9 @@ case class StringRepeat(str: Expression, times: Expression)
/**
* Returns the reversed given string.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Returns the reversed given string.",
extended = "> SELECT _FUNC_('SparkSql');\n 'lqSkrapS'")
case class StringReverse(child: Expression) extends UnaryExpression with String2StringExpression {
override def convert(v: UTF8String): UTF8String = v.reverse()

Expand Down Expand Up @@ -693,6 +753,12 @@ case class StringSpace(child: Expression)
* A function that takes a substring of its first argument starting at a given position.
* Defined for String and Binary types.
*/
@ExpressionDescription(
usage = "_FUNC_(str, delim, count) - Returns the substring from string str before count " +
"occurrences of the delimiter delim. If count is positive, everything the left of the " +
"final delimiter (counting from left) is returned. If count is negative, every to the " +
"right of the final delimiter (counting from the right) is returned.",
extended = "> SELECT _FUNC_('SparkSql',1,5);\n 'Spark'")
case class Substring(str: Expression, pos: Expression, len: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

Expand Down Expand Up @@ -730,6 +796,9 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
/**
* A function that return the length of the given string or binary expression.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Returns the length of the given string or binary expression.",
extended = "> SELECT _FUNC_('This is a test');\n 14")
case class Length(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
Expand All @@ -750,6 +819,9 @@ case class Length(child: Expression) extends UnaryExpression with ExpectsInputTy
/**
* A function that return the Levenshtein distance between the two given strings.
*/
@ExpressionDescription(
usage = "_FUNC_(string1, string2) - Returns the Levenshtein distance between the two given strings.",
extended = "> SELECT _FUNC_('This is a test', 'Another test');\n 9")
case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {

Expand All @@ -768,6 +840,9 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
/**
* A function that return soundex code of the given string expression.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Returns soundex code of the given string expression.",
extended = "> SELECT _FUNC_('SparkSql');\n S162")
case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes {

override def dataType: DataType = StringType
Expand All @@ -784,6 +859,9 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT
/**
* Returns the numeric value of the first character of str.
*/
@ExpressionDescription(
usage = "_FUNC_(string) - Returns the numeric value of the first character of the string.",
extended = "> SELECT _FUNC_('SparkSql');\n 83")
case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def dataType: DataType = IntegerType
Expand Down Expand Up @@ -815,6 +893,9 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp
/**
* Converts the argument from binary to a base 64 string.
*/
@ExpressionDescription(
usage = "_FUNC_(binary) - Computes the BASE64 encoding of the binary string and returns it.",
extended = "> SELECT _FUNC_(binary_column)")
case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def dataType: DataType = StringType
Expand All @@ -838,6 +919,9 @@ case class Base64(child: Expression) extends UnaryExpression with ImplicitCastIn
/**
* Converts the argument from a base 64 string to BINARY.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Converts the str from a base 64 string to BINARY.",
extended = "> SELECT _FUNC_('SparkSql');\n [4A 96 AB 91 2A A5]")
case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def dataType: DataType = BinaryType
Expand All @@ -859,6 +943,11 @@ case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCast
* (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
* If either argument is null, the result will also be null.
*/
@ExpressionDescription(
usage = "_FUNC_(binary, charset) - Computes the first argument into a string from a binary " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"binary" should be in all-caps

"using the provided character set (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', " +
"'UTF-16LE', 'UTF-16'). If either argument is null, the result will also be null.",
extended = "> SELECT _FUNC_(decode(binary_column, 'UTF-8'))")
case class Decode(bin: Expression, charset: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

Expand Down Expand Up @@ -889,6 +978,11 @@ case class Decode(bin: Expression, charset: Expression)
* (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
* If either argument is null, the result will also be null.
*/
@ExpressionDescription(
usage = "_FUNC_(string, charset) - Computes the first argument into a BINARY using " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*Converts the first argument

"the provided charset (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', " +
"'UTF-16LE', 'UTF-16'). If either argument is null, the result will also be null.",
extended = "> SELECT _FUNC_('SparkSql', 'UTF-8');\n [53 70 61 72 6B 53 71 6C]")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On my Mac, the query select Encode('SparkSql', 'UTF-8'); returns the SparkSql in the spark-sql shell.

case class Encode(value: Expression, charset: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

Expand Down Expand Up @@ -918,6 +1012,11 @@ case class Encode(value: Expression, charset: Expression)
* and returns the result as a string. If D is 0, the result has no decimal point or
* fractional part.
*/
@ExpressionDescription(
usage = "_FUNC_(number, decimal) - Formats the number to a format like '#,###,###.##', " +
"rounded to decimal places, and returns the result as a string. If decimal is 0, the " +
"result has no decimal point or fractional part.",
extended = "> SELECT _FUNC_(1234567.890, 2));\n 1,234,567.89")
case class FormatNumber(x: Expression, d: Expression)
extends BinaryExpression with ExpectsInputTypes {

Expand Down