Skip to content

Commit 9e12d86

Browse files
authored
Merge pull request #574 from ermolenkodev/KTNB-272
Fix for #573: Change serialization format for rendering in IntelliJ IDEA
2 parents 8ac0ace + 7a4ad5c commit 9e12d86

File tree

16 files changed

+2128
-1404
lines changed

16 files changed

+2128
-1404
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt

Lines changed: 606 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
package org.jetbrains.kotlinx.dataframe.impl.io
2+
3+
import com.beust.klaxon.JsonArray
4+
import com.beust.klaxon.JsonObject
5+
import com.beust.klaxon.KlaxonJson
6+
import org.jetbrains.kotlinx.dataframe.AnyCol
7+
import org.jetbrains.kotlinx.dataframe.AnyFrame
8+
import org.jetbrains.kotlinx.dataframe.ColumnsContainer
9+
import org.jetbrains.kotlinx.dataframe.DataColumn
10+
import org.jetbrains.kotlinx.dataframe.api.indices
11+
import org.jetbrains.kotlinx.dataframe.api.isList
12+
import org.jetbrains.kotlinx.dataframe.api.name
13+
import org.jetbrains.kotlinx.dataframe.api.rows
14+
import org.jetbrains.kotlinx.dataframe.api.take
15+
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
16+
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
17+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
18+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
19+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA
20+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND
21+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
22+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
23+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
24+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
25+
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
26+
import org.jetbrains.kotlinx.dataframe.io.arrayColumnName
27+
import org.jetbrains.kotlinx.dataframe.io.valueColumnName
28+
import org.jetbrains.kotlinx.dataframe.ncol
29+
import org.jetbrains.kotlinx.dataframe.nrow
30+
import org.jetbrains.kotlinx.dataframe.typeClass
31+
32+
internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? {
33+
val values = frame.columns().map { col ->
34+
when (col) {
35+
is ColumnGroup<*> -> encodeRow(col, index)
36+
is FrameColumn<*> -> encodeFrame(col[index])
37+
else -> encodeValue(col, index)
38+
}.let { col.name to it }
39+
}
40+
if (values.isEmpty()) return null
41+
return obj(values)
42+
}
43+
44+
internal object SerializationKeys {
45+
const val DATA = "data"
46+
const val METADATA = "metadata"
47+
const val KIND = "kind"
48+
const val NCOL = "ncol"
49+
const val NROW = "nrow"
50+
const val VERSION = "\$version"
51+
const val COLUMNS = "columns"
52+
const val KOTLIN_DATAFRAME = "kotlin_dataframe"
53+
}
54+
55+
internal const val SERIALIZATION_VERSION = "2.0.0"
56+
57+
internal fun KlaxonJson.encodeRowWithMetadata(
58+
frame: ColumnsContainer<*>,
59+
index: Int,
60+
rowLimit: Int? = null
61+
): JsonObject? {
62+
val values = frame.columns().map { col ->
63+
when (col) {
64+
is ColumnGroup<*> -> obj(
65+
DATA to encodeRowWithMetadata(col, index, rowLimit),
66+
METADATA to obj(KIND to ColumnKind.Group.toString())
67+
)
68+
69+
is FrameColumn<*> -> {
70+
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index])
71+
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit)
72+
obj(
73+
DATA to data,
74+
METADATA to obj(
75+
KIND to ColumnKind.Frame.toString(),
76+
NCOL to col[index].ncol,
77+
NROW to col[index].nrow
78+
)
79+
)
80+
}
81+
82+
else -> encodeValue(col, index)
83+
}.let { col.name to it }
84+
}
85+
if (values.isEmpty()) return null
86+
return obj(values)
87+
}
88+
89+
private val valueTypes =
90+
setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class)
91+
92+
internal fun KlaxonJson.encodeValue(col: AnyCol, index: Int): Any? = when {
93+
col.isList() -> col[index]?.let { array(it as List<*>) } ?: array()
94+
col.typeClass in valueTypes -> {
95+
val v = col[index]
96+
if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) {
97+
v.toString()
98+
} else v
99+
}
100+
101+
else -> col[index]?.toString()
102+
}
103+
104+
internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int? = null): JsonArray<*> {
105+
val valueColumn = frame.extractValueColumn()
106+
val arrayColumn = frame.extractArrayColumn()
107+
108+
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame
109+
110+
val data = frame.indices().map { rowIndex ->
111+
valueColumn
112+
?.get(rowIndex)
113+
?: arrayColumn?.get(rowIndex)
114+
?.let {
115+
if (arraysAreFrames) encodeFrameWithMetadata(it as AnyFrame, rowLimit) else null
116+
}
117+
?: encodeRowWithMetadata(frame, rowIndex, rowLimit)
118+
}
119+
120+
return array(data)
121+
}
122+
123+
internal fun AnyFrame.extractValueColumn(): DataColumn<*>? {
124+
val allColumns = columns()
125+
126+
return allColumns.filter { it.name.startsWith(valueColumnName) }
127+
.takeIf { isPossibleToFindUnnamedColumns }
128+
?.maxByOrNull { it.name }?.let { valueCol ->
129+
if (valueCol.kind() != ColumnKind.Value) { // check that value in this column is not null only when other values are null
130+
null
131+
} else {
132+
// check that value in this column is not null only when other values are null
133+
val isValidValueColumn = rows().all { row ->
134+
if (valueCol[row] != null) {
135+
allColumns.all { col ->
136+
if (col.name != valueCol.name) col[row] == null
137+
else true
138+
}
139+
} else true
140+
}
141+
if (isValidValueColumn) valueCol
142+
else null
143+
}
144+
}
145+
}
146+
147+
// if there is only 1 column, then `isValidValueColumn` always true.
148+
// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column
149+
// because it was created by user.
150+
internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean
151+
get() = columns().size != 1
152+
153+
internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? {
154+
val allColumns = columns()
155+
156+
return columns().filter { it.name.startsWith(arrayColumnName) }
157+
.takeIf { isPossibleToFindUnnamedColumns }
158+
?.maxByOrNull { it.name }?.let { arrayCol ->
159+
if (arrayCol.kind() == ColumnKind.Group) null
160+
else {
161+
// check that value in this column is not null only when other values are null
162+
val isValidArrayColumn = rows().all { row ->
163+
if (arrayCol[row] != null) {
164+
allColumns.all { col ->
165+
if (col.name != arrayCol.name) col[row] == null
166+
else true
167+
}
168+
} else true
169+
}
170+
if (isValidArrayColumn) arrayCol
171+
else null
172+
}
173+
}
174+
}
175+
176+
internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
177+
val valueColumn = frame.extractValueColumn()
178+
val arrayColumn = frame.extractArrayColumn()
179+
180+
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame
181+
182+
val data = frame.indices().map { rowIndex ->
183+
valueColumn
184+
?.get(rowIndex)
185+
?: arrayColumn?.get(rowIndex)
186+
?.let {
187+
if (arraysAreFrames) encodeFrame(it as AnyFrame) else null
188+
}
189+
?: encodeRow(frame, rowIndex)
190+
}
191+
192+
return array(data)
193+
}
194+
195+
internal fun KlaxonJson.encodeDataFrameWithMetadata(
196+
frame: AnyFrame,
197+
rowLimit: Int,
198+
nestedRowLimit: Int? = null,
199+
): JsonObject {
200+
return obj(
201+
VERSION to SERIALIZATION_VERSION,
202+
METADATA to obj(
203+
COLUMNS to frame.columnNames(),
204+
NROW to frame.rowsCount(),
205+
NCOL to frame.columnsCount()
206+
),
207+
KOTLIN_DATAFRAME to encodeFrameWithMetadata(
208+
frame.take(rowLimit),
209+
rowLimit = nestedRowLimit
210+
),
211+
)
212+
}

0 commit comments

Comments
 (0)