-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-17495] [SQL] Support Decimal type in Hive-hash #17056
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d16ae0f
7bc4eab
2e4c333
c0c8390
f60ffb9
65a09e9
7c0b6c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,7 +75,6 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { | |
| checkConsistencyBetweenInterpretedAndCodegen(Crc32, BinaryType) | ||
| } | ||
|
|
||
|
|
||
| def checkHiveHash(input: Any, dataType: DataType, expected: Long): Unit = { | ||
| // Note : All expected hashes need to be computed using Hive 1.2.1 | ||
| val actual = HiveHashFunction.hash(input, dataType, seed = 0) | ||
|
|
@@ -371,6 +370,51 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { | |
| new StructType().add("array", arrayOfString).add("map", mapOfString)) | ||
| .add("structOfUDT", structOfUDT)) | ||
|
|
||
| test("hive-hash for decimal") { | ||
| def checkHiveHashForDecimal( | ||
| input: String, | ||
| precision: Int, | ||
| scale: Int, | ||
| expected: Long): Unit = { | ||
| val decimalType = DataTypes.createDecimalType(precision, scale) | ||
| val decimal = { | ||
| val value = Decimal.apply(new java.math.BigDecimal(input)) | ||
| if (value.changePrecision(precision, scale)) value else null | ||
| } | ||
|
|
||
| checkHiveHash(decimal, decimalType, expected) | ||
| } | ||
|
|
||
| checkHiveHashForDecimal("18", 38, 0, 558) | ||
|
||
| checkHiveHashForDecimal("-18", 38, 0, -558) | ||
| checkHiveHashForDecimal("-18", 38, 12, -558) | ||
| checkHiveHashForDecimal("18446744073709001000", 38, 19, 0) | ||
| checkHiveHashForDecimal("-18446744073709001000", 38, 22, 0) | ||
| checkHiveHashForDecimal("-18446744073709001000", 38, 3, 17070057) | ||
| checkHiveHashForDecimal("18446744073709001000", 38, 4, -17070057) | ||
| checkHiveHashForDecimal("9223372036854775807", 38, 4, 2147482656) | ||
| checkHiveHashForDecimal("-9223372036854775807", 38, 5, -2147482656) | ||
| checkHiveHashForDecimal("00000.00000000000", 38, 34, 0) | ||
| checkHiveHashForDecimal("-00000.00000000000", 38, 11, 0) | ||
| checkHiveHashForDecimal("123456.1234567890", 38, 2, 382713974) | ||
| checkHiveHashForDecimal("123456.1234567890", 38, 20, 1871500252) | ||
| checkHiveHashForDecimal("123456.1234567890", 38, 10, 1871500252) | ||
| checkHiveHashForDecimal("-123456.1234567890", 38, 10, -1871500234) | ||
| checkHiveHashForDecimal("123456.1234567890", 38, 0, 3827136) | ||
| checkHiveHashForDecimal("-123456.1234567890", 38, 0, -3827136) | ||
| checkHiveHashForDecimal("123456.1234567890", 38, 20, 1871500252) | ||
| checkHiveHashForDecimal("-123456.1234567890", 38, 20, -1871500234) | ||
| checkHiveHashForDecimal("123456.123456789012345678901234567890", 38, 0, 3827136) | ||
| checkHiveHashForDecimal("-123456.123456789012345678901234567890", 38, 0, -3827136) | ||
| checkHiveHashForDecimal("123456.123456789012345678901234567890", 38, 10, 1871500252) | ||
| checkHiveHashForDecimal("-123456.123456789012345678901234567890", 38, 10, -1871500234) | ||
| checkHiveHashForDecimal("123456.123456789012345678901234567890", 38, 20, 236317582) | ||
| checkHiveHashForDecimal("-123456.123456789012345678901234567890", 38, 20, -236317544) | ||
| checkHiveHashForDecimal("123456.123456789012345678901234567890", 38, 30, 1728235666) | ||
| checkHiveHashForDecimal("-123456.123456789012345678901234567890", 38, 30, -1728235608) | ||
| checkHiveHashForDecimal("123456.123456789012345678901234567890", 38, 31, 1728235666) | ||
| } | ||
|
|
||
| test("SPARK-18207: Compute hash for a lot of expressions") { | ||
| val N = 1000 | ||
| val wideRow = new GenericInternalRow( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where do we use
ctxandd?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They both aren't used but are a part of the method signature since the default impl in abstract class needs those :
spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
Line 321 in 3e40f6c