Skip to content

Commit 2c996d0

Browse files
authored
Merge pull request #550 from Kopilov/NullVector
Reading Arrow NullVector
2 parents 0d4c052 + 856c4a5 commit 2c996d0

File tree

6 files changed

+29
-0
lines changed

6 files changed

+29
-0
lines changed

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import org.apache.arrow.vector.Float8Vector
1313
import org.apache.arrow.vector.IntVector
1414
import org.apache.arrow.vector.LargeVarBinaryVector
1515
import org.apache.arrow.vector.LargeVarCharVector
16+
import org.apache.arrow.vector.NullVector
1617
import org.apache.arrow.vector.SmallIntVector
1718
import org.apache.arrow.vector.TimeMicroVector
1819
import org.apache.arrow.vector.TimeMilliVector
@@ -172,6 +173,10 @@ private fun StructVector.values(range: IntRange): List<Map<String, Any?>?> = ran
172173
getObject(it)
173174
}
174175

176+
private fun NullVector.values(range: IntRange): List<Nothing?> = range.map {
177+
getObject(it) as Nothing?
178+
}
179+
175180
private fun VarCharVector.values(range: IntRange): List<String?> = range.map {
176181
if (isNull(it)) {
177182
null
@@ -204,6 +209,12 @@ private fun LargeVarCharVector.values(range: IntRange): List<String?> = range.ma
204209
}
205210
}
206211

212+
internal fun nothingType(nullable: Boolean): KType = if (nullable) {
213+
typeOf<List<Nothing?>>()
214+
} else {
215+
typeOf<List<Nothing>>()
216+
}.arguments.first().type!!
217+
207218
private inline fun <reified T> List<T?>.withTypeNullable(
208219
expectedNulls: Boolean,
209220
nullabilityOptions: NullabilityOptions,
@@ -212,6 +223,15 @@ private inline fun <reified T> List<T?>.withTypeNullable(
212223
return this to typeOf<T>().withNullability(nullable)
213224
}
214225

226+
@JvmName("withTypeNullableNothingList")
227+
private fun List<Nothing?>.withTypeNullable(
228+
expectedNulls: Boolean,
229+
nullabilityOptions: NullabilityOptions,
230+
): Pair<List<Nothing?>, KType> {
231+
val nullable = nullabilityOptions.applyNullability(this, expectedNulls)
232+
return this to nothingType(nullable)
233+
}
234+
215235
private fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol {
216236
try {
217237
val range = 0 until root.rowCount
@@ -245,6 +265,7 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi
245265
is TimeStampMilliVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
246266
is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
247267
is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
268+
is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
248269
else -> {
249270
throw NotImplementedError("reading from ${vector.javaClass.canonicalName} is not implemented")
250271
}

dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,12 @@ internal fun assertEstimations(exampleFrame: AnyFrame, expectedNullable: Boolean
158158
timeNanoCol.forEachIndexed { i, element ->
159159
assertValueOrNull(iBatch(i), element, LocalTime.ofNanoOfDay(iBatch(i).toLong()))
160160
}
161+
162+
exampleFrame.getColumnOrNull("nulls")?.let { nullCol ->
163+
nullCol.type() shouldBe nothingType(hasNulls)
164+
assert(hasNulls)
165+
nullCol.values().forEach {
166+
assert(it == null)
167+
}
168+
}
161169
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)