Skip to content

Commit 6458d84

Browse files
committed
fixed unsigned number types position in the graph. renamed to "unified numbers", added central doc template with graph
1 parent 015eb99 commit 6458d84

File tree

5 files changed

+128
-91
lines changed

5 files changed

+128
-91
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package org.jetbrains.kotlinx.dataframe.documentation
2+
3+
/**
4+
* ## Unifying Numbers
5+
*
6+
* The concept of unifying numbers is converting them to a common number type without losing information.
7+
*
8+
* The following graph shows the hierarchy of number types in Kotlin DataFrame.
9+
* The order is top-down from the most complex type to the simplest one.
10+
*
11+
* {@include [Graph]}
12+
* For each number type in the graph, it holds that a number of that type can be expressed lossless by
13+
* a number of a more complex type (any of its parents).
14+
* This is either because the more complex type has a larger range or higher precision (in terms of bits).
15+
*/
16+
internal interface UnifyingNumbers {
17+
18+
/**
19+
* ```
20+
* BigDecimal
21+
* / \\
22+
* BigInteger \\
23+
* / \\ \\
24+
* ULong Long Double
25+
* .. | / | / | \\..
26+
* \\ | / | / |
27+
* UInt Int Float
28+
* .. | / | / \\..
29+
* \\ | / | /
30+
* UShort Short
31+
* | / |
32+
* | / |
33+
* UByte Byte
34+
* ```
35+
*/
36+
interface Graph
37+
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/NumberTypeUtils.kt

Lines changed: 58 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.impl
22

3+
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
34
import org.jetbrains.kotlinx.dataframe.impl.api.createConverter
4-
import org.jetbrains.kotlinx.dataframe.impl.commonNumberType
55
import java.math.BigDecimal
66
import java.math.BigInteger
77
import kotlin.reflect.KClass
@@ -13,70 +13,65 @@ import kotlin.reflect.typeOf
1313
* Number type graph, structured in terms of number complexity.
1414
* A number can always be expressed lossless by a number of a more complex type (any of its parents).
1515
*
16-
* ```
17-
* BigDecimal
18-
* / \\
19-
* BigInteger |
20-
* | |
21-
* ULong |
22-
* | |
23-
* Long Double
24-
* \\ / |
25-
* UInt |
26-
* | |
27-
* Int Float
28-
* | /
29-
* UShort
30-
* |
31-
* Short
32-
* |
33-
* UByte
34-
* |
35-
* Byte
36-
* ```
16+
* {@include [UnifyingNumbers.Graph]}
3717
*
3818
* For any two numbers, we can find the nearest common ancestor in this graph
3919
* by calling [DirectedAcyclicGraph.findNearestCommonVertex].
40-
* @see getCommonNumberClass
41-
* @see commonNumberClass
20+
* @see getUnifiedNumberClass
21+
* @see unifiedNumberClass
22+
* @see UnifyingNumbers
4223
*/
43-
internal val numberTypeGraph: DirectedAcyclicGraph<KType> by lazy {
44-
dagOf(
45-
typeOf<BigDecimal>() to typeOf<BigInteger>(),
46-
typeOf<BigDecimal>() to typeOf<Double>(),
47-
typeOf<BigInteger>() to typeOf<ULong>(),
48-
typeOf<ULong>() to typeOf<Long>(),
49-
typeOf<Long>() to typeOf<UInt>(),
50-
typeOf<Double>() to typeOf<UInt>(),
51-
typeOf<Double>() to typeOf<Float>(),
52-
typeOf<UInt>() to typeOf<Int>(),
53-
typeOf<Int>() to typeOf<UShort>(),
54-
typeOf<Float>() to typeOf<UShort>(),
55-
typeOf<UShort>() to typeOf<Short>(),
56-
typeOf<Short>() to typeOf<UByte>(),
57-
typeOf<UByte>() to typeOf<Byte>(),
58-
)
24+
internal val unifiedNumberTypeGraph: DirectedAcyclicGraph<KType> by lazy {
25+
buildDag {
26+
addEdge(typeOf<BigDecimal>(), typeOf<BigInteger>())
27+
addEdge(typeOf<BigDecimal>(), typeOf<Double>())
28+
29+
addEdge(typeOf<BigInteger>(), typeOf<ULong>())
30+
addEdge(typeOf<BigInteger>(), typeOf<Long>())
31+
32+
addEdge(typeOf<ULong>(), typeOf<UInt>())
33+
34+
addEdge(typeOf<Long>(), typeOf<UInt>())
35+
addEdge(typeOf<Long>(), typeOf<Int>())
36+
37+
addEdge(typeOf<Double>(), typeOf<Int>())
38+
addEdge(typeOf<Double>(), typeOf<Float>())
39+
addEdge(typeOf<Double>(), typeOf<UInt>())
40+
41+
addEdge(typeOf<UInt>(), typeOf<UShort>())
42+
43+
addEdge(typeOf<Int>(), typeOf<UShort>())
44+
addEdge(typeOf<Int>(), typeOf<Short>())
45+
46+
addEdge(typeOf<Float>(), typeOf<Short>())
47+
addEdge(typeOf<Float>(), typeOf<UShort>())
48+
49+
addEdge(typeOf<UShort>(), typeOf<UByte>())
50+
51+
addEdge(typeOf<Short>(), typeOf<UByte>())
52+
addEdge(typeOf<Short>(), typeOf<Byte>())
53+
}
5954
}
6055

61-
/** @include [numberTypeGraph] */
62-
internal val numberClassGraph: DirectedAcyclicGraph<KClass<*>> by lazy {
63-
numberTypeGraph.map { it.classifier as KClass<*> }
56+
/** @include [unifiedNumberTypeGraph] */
57+
internal val unifiedNumberClassGraph: DirectedAcyclicGraph<KClass<*>> by lazy {
58+
unifiedNumberTypeGraph.map { it.classifier as KClass<*> }
6459
}
6560

6661
/**
6762
* Determines the nearest common numeric type, in terms of complexity, between two given classes/types.
6863
*
6964
* Unsigned types are supported too even though they are not a [Number] instance,
70-
* but unless an unsigned type is provided in the input, it will never be returned.
71-
* Meaning, given two [Number] inputs, the output will always be a [Number].
65+
* but unless two unsigned types are provided in the input, it will never be returned.
66+
* Meaning, a single [Number] input, the output will always be a [Number].
7267
*
7368
* @param first The first numeric type to compare. Can be null, in which case the second to is returned.
7469
* @param second The second numeric to compare. Cannot be null.
7570
* @return The nearest common numeric type between the two input classes.
7671
* If no common class is found, [IllegalStateException] is thrown.
77-
* @see numberTypeGraph
72+
* @see UnifyingNumbers
7873
*/
79-
internal fun getCommonNumberType(first: KType?, second: KType): KType {
74+
internal fun getUnifiedNumberType(first: KType?, second: KType): KType {
8075
if (first == null) return second
8176

8277
val firstWithoutNullability = first.withNullability(false)
@@ -85,56 +80,57 @@ internal fun getCommonNumberType(first: KType?, second: KType): KType {
8580
val result = if (firstWithoutNullability == secondWithoutNullability) {
8681
firstWithoutNullability
8782
} else {
88-
numberTypeGraph.findNearestCommonVertex(firstWithoutNullability, secondWithoutNullability)
83+
unifiedNumberTypeGraph.findNearestCommonVertex(firstWithoutNullability, secondWithoutNullability)
8984
?: error("Can not find common number type for $first and $second")
9085
}
9186

9287
return if (first.isMarkedNullable || second.isMarkedNullable) result.withNullability(true) else result
9388
}
9489

95-
/** @include [getCommonNumberType] */
90+
/** @include [getUnifiedNumberType] */
9691
@Suppress("IntroduceWhenSubject")
97-
internal fun getCommonNumberClass(first: KClass<*>?, second: KClass<*>): KClass<*> =
92+
internal fun getUnifiedNumberClass(first: KClass<*>?, second: KClass<*>): KClass<*> =
9893
when {
9994
first == null -> second
10095

10196
first == second -> first
10297

103-
else -> numberClassGraph.findNearestCommonVertex(first, second)
98+
else -> unifiedNumberClassGraph.findNearestCommonVertex(first, second)
10499
?: error("Can not find common number type for $first and $second")
105100
}
106101

107102
/**
108103
* Determines the nearest common numeric type, in terms of complexity, all types in [this].
109104
*
110105
* Unsigned types are supported too even though they are not a [Number] instance,
111-
* but unless an unsigned type is provided in the input, it will never be returned.
112-
* Meaning, given just [Number] inputs, the output will always be a [Number].
106+
* but unless the input solely exists of unsigned numbers, it will never be returned.
107+
* Meaning, given a [Number] in the input, the output will always be a [Number].
113108
*
114109
* @return The nearest common numeric type between the input types.
115110
* If no common type is found, it returns [Number].
116-
* @see numberTypeGraph
111+
* @see UnifyingNumbers
117112
*/
118-
internal fun Iterable<KType>.commonNumberType(): KType = fold(null as KType?, ::getCommonNumberType) ?: typeOf<Number>()
113+
internal fun Iterable<KType>.unifiedNumberType(): KType =
114+
fold(null as KType?, ::getUnifiedNumberType) ?: typeOf<Number>()
119115

120-
/** @include [commonNumberType] */
121-
internal fun Iterable<KClass<*>>.commonNumberClass(): KClass<*> =
122-
fold(null as KClass<*>?, ::getCommonNumberClass) ?: Number::class
116+
/** @include [unifiedNumberType] */
117+
internal fun Iterable<KClass<*>>.unifiedNumberClass(): KClass<*> =
118+
fold(null as KClass<*>?, ::getUnifiedNumberClass) ?: Number::class
123119

124120
/**
125121
* Converts the elements of the given iterable of numbers into a common numeric type based on complexity.
126122
* The common numeric type is determined using the provided [commonNumberType] parameter
127-
* or calculated with [Iterable.commonNumberType] from the iterable's elements if not explicitly specified.
123+
* or calculated with [Iterable.unifiedNumberType] from the iterable's elements if not explicitly specified.
128124
*
129125
* @param commonNumberType The desired common numeric type to convert the elements to.
130126
* This is determined by default using the types of the elements in the iterable.
131127
* @return A new iterable of numbers where each element is converted to the specified or inferred common number type.
132128
* @throws IllegalStateException if an element cannot be converted to the common number type.
133-
* @see Iterable.commonNumberType
129+
* @see UnifyingNumbers
134130
*/
135131
@Suppress("UNCHECKED_CAST")
136-
internal fun Iterable<Number>.convertToCommonNumberType(
137-
commonNumberType: KType = this.types().commonNumberType(),
132+
internal fun Iterable<Number>.convertToUnifiedNumberType(
133+
commonNumberType: KType = this.types().unifiedNumberType(),
138134
): Iterable<Number> {
139135
val converter = createConverter(typeOf<Number>(), commonNumberType)!! as (Number) -> Number?
140136
return map {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/NumbersAggregator.kt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
4-
import org.jetbrains.kotlinx.dataframe.impl.commonNumberType
5-
import org.jetbrains.kotlinx.dataframe.impl.convertToCommonNumberType
6-
import org.jetbrains.kotlinx.dataframe.impl.types
4+
import org.jetbrains.kotlinx.dataframe.impl.convertToUnifiedNumberType
5+
import org.jetbrains.kotlinx.dataframe.impl.unifiedNumberType
76
import kotlin.reflect.KProperty
87
import kotlin.reflect.KType
98

@@ -27,9 +26,9 @@ internal class NumbersAggregator(name: String, aggregate: (Iterable<Number>, KTy
2726
*/
2827
@Suppress("UNCHECKED_CAST")
2928
fun aggregateMixed(values: Iterable<Number>, types: Set<KType>): Number? {
30-
val commonType = types.commonNumberType()
29+
val commonType = types.unifiedNumberType()
3130
return aggregate(
32-
values = values.convertToCommonNumberType(commonType),
31+
values = values.convertToUnifiedNumberType(commonType),
3332
type = commonType,
3433
)
3534
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class SumTests {
6464
df.sum { value3 } shouldBe expected3
6565
}
6666

67-
// Issue #1068
67+
/** [Issue #1068](https://github.com/Kotlin/dataframe/issues/1068) */
6868
@Test
6969
fun `rowSum mixed number types`() {
7070
dataFrameOf("a", "b")(1, 2f)[0].rowSum().let {

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@ import io.kotest.matchers.shouldBe
44
import org.jetbrains.kotlinx.dataframe.DataColumn
55
import org.jetbrains.kotlinx.dataframe.DataRow
66
import org.jetbrains.kotlinx.dataframe.api.columnOf
7+
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
78
import org.jetbrains.kotlinx.dataframe.impl.asArrayAsListOrNull
89
import org.jetbrains.kotlinx.dataframe.impl.commonParent
910
import org.jetbrains.kotlinx.dataframe.impl.commonParents
1011
import org.jetbrains.kotlinx.dataframe.impl.commonType
1112
import org.jetbrains.kotlinx.dataframe.impl.commonTypeListifyValues
1213
import org.jetbrains.kotlinx.dataframe.impl.createType
13-
import org.jetbrains.kotlinx.dataframe.impl.getCommonNumberClass
14+
import org.jetbrains.kotlinx.dataframe.impl.getUnifiedNumberClass
1415
import org.jetbrains.kotlinx.dataframe.impl.guessValueType
1516
import org.jetbrains.kotlinx.dataframe.impl.isArray
1617
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveArray
@@ -418,43 +419,47 @@ class UtilTests {
418419
).commonTypeListifyValues() shouldBe typeOf<Collection<out Nothing?>?>()
419420
}
420421

422+
/**
423+
* See [UnifyingNumbers] for more information.
424+
* {@include [UnifyingNumbers.Graph]}
425+
*/
421426
@Test
422427
fun `common number types`() {
423428
// Same type
424-
getCommonNumberClass(Int::class, Int::class) shouldBe Int::class
425-
getCommonNumberClass(Double::class, Double::class) shouldBe Double::class
429+
getUnifiedNumberClass(Int::class, Int::class) shouldBe Int::class
430+
getUnifiedNumberClass(Double::class, Double::class) shouldBe Double::class
426431

427432
// Direct parent-child relationships
428-
getCommonNumberClass(Int::class, UShort::class) shouldBe Int::class
429-
getCommonNumberClass(UInt::class, Int::class) shouldBe UInt::class
430-
getCommonNumberClass(Long::class, UInt::class) shouldBe Long::class
431-
getCommonNumberClass(Double::class, Float::class) shouldBe Double::class
433+
getUnifiedNumberClass(Int::class, UShort::class) shouldBe Int::class
434+
getUnifiedNumberClass(Long::class, UInt::class) shouldBe Long::class
435+
getUnifiedNumberClass(Double::class, Float::class) shouldBe Double::class
436+
getUnifiedNumberClass(UShort::class, Short::class) shouldBe Int::class
437+
getUnifiedNumberClass(UByte::class, Byte::class) shouldBe Short::class
432438

433-
// Parent-child relationships for signed/unsigned types
434-
getCommonNumberClass(UShort::class, Short::class) shouldBe UShort::class
435-
getCommonNumberClass(UByte::class, Byte::class) shouldBe UByte::class
439+
getUnifiedNumberClass(UByte::class, UShort::class) shouldBe UShort::class
436440

437441
// Multi-level relationships
438-
getCommonNumberClass(Byte::class, Int::class) shouldBe Int::class
439-
getCommonNumberClass(UByte::class, Long::class) shouldBe Long::class
440-
getCommonNumberClass(Short::class, Double::class) shouldBe Double::class
442+
getUnifiedNumberClass(Byte::class, Int::class) shouldBe Int::class
443+
getUnifiedNumberClass(UByte::class, Long::class) shouldBe Long::class
444+
getUnifiedNumberClass(Short::class, Double::class) shouldBe Double::class
445+
getUnifiedNumberClass(UInt::class, Int::class) shouldBe Long::class
441446

442447
// Top-level types
443-
getCommonNumberClass(BigDecimal::class, Double::class) shouldBe BigDecimal::class
444-
getCommonNumberClass(BigInteger::class, Long::class) shouldBe BigInteger::class
445-
getCommonNumberClass(BigDecimal::class, BigInteger::class) shouldBe BigDecimal::class
448+
getUnifiedNumberClass(BigDecimal::class, Double::class) shouldBe BigDecimal::class
449+
getUnifiedNumberClass(BigInteger::class, Long::class) shouldBe BigInteger::class
450+
getUnifiedNumberClass(BigDecimal::class, BigInteger::class) shouldBe BigDecimal::class
446451

447452
// Distant relationships
448-
getCommonNumberClass(Byte::class, BigDecimal::class) shouldBe BigDecimal::class
449-
getCommonNumberClass(UByte::class, Double::class) shouldBe Double::class
453+
getUnifiedNumberClass(Byte::class, BigDecimal::class) shouldBe BigDecimal::class
454+
getUnifiedNumberClass(UByte::class, Double::class) shouldBe Double::class
450455

451456
// Complex type promotions
452-
getCommonNumberClass(Int::class, Float::class) shouldBe Double::class
453-
getCommonNumberClass(Long::class, Double::class) shouldBe BigDecimal::class
454-
getCommonNumberClass(ULong::class, Double::class) shouldBe BigDecimal::class
455-
getCommonNumberClass(BigInteger::class, Double::class) shouldBe BigDecimal::class
457+
getUnifiedNumberClass(Int::class, Float::class) shouldBe Double::class
458+
getUnifiedNumberClass(Long::class, Double::class) shouldBe BigDecimal::class
459+
getUnifiedNumberClass(ULong::class, Double::class) shouldBe BigDecimal::class
460+
getUnifiedNumberClass(BigInteger::class, Double::class) shouldBe BigDecimal::class
456461

457462
// Edge case with null
458-
getCommonNumberClass(null, Int::class) shouldBe Int::class
463+
getUnifiedNumberClass(null, Int::class) shouldBe Int::class
459464
}
460465
}

0 commit comments

Comments
 (0)