Kotlin
diff --git a/‎examples/README.md‎
Lines changed: 10 additions & 0 deletions b/‎examples/README.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/idea-examples/unsupported-data-sources/build.gradle.kts‎
Lines changed: 18 additions & 3 deletions b/‎examples/idea-examples/unsupported-data-sources/build.gradle.kts‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark/compatibilityLayer.kt‎
Lines changed: 5 additions & 310 deletions b/‎examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark/compatibilityLayer.kt‎
Lines changed: 5 additions & 310 deletions
diff --git a/‎examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark/untypedDataset.kt‎
Lines changed: 4 additions & 1 deletion b/‎examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark/untypedDataset.kt‎
Lines changed: 4 additions & 1 deletion
@@ -7,6 +7,16 @@
 * [json](idea-examples/json) Using OpenAPI support in DataFrame's Gradle and KSP plugins to access data from [API guru](https://apis.guru/) in a type-safe manner
 * [imdb sql database](https://github.com/zaleslaw/KotlinDataFrame-SQL-Examples) This project prominently showcases how to convert data from an SQL table to a Kotlin DataFrame 
 and how to transform the result of an SQL query into a DataFrame.
+* [unsupported-data-sources](idea-examples/unsupported-data-sources) Showcases of how to use DataFrame with
+  (momentarily) unsupported data libraries such as [Spark](https://spark.apache.org/) and [Exposed](https://github.com/JetBrains/Exposed).
+They show how to convert to and from Kotlin Dataframe and their respective tables.
+  * **JetBrains Exposed**: See the [exposed folder](./idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/exposed)
+    for an example of using Kotlin Dataframe with [Exposed](https://github.com/JetBrains/Exposed).
+  * **Apache Spark**: See the [spark folder](./idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/spark)
+    for an example of using Kotlin Dataframe with [Spark](https://spark.apache.org/).
+  * **Spark (with Kotlin Spark API)**: See the [kotlinSpark folder](./idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark)
+    for an example of using Kotlin DataFrame with the [Kotlin Spark API](https://github.com/JetBrains/kotlin-spark-api).
+
 
 ### Notebook examples
 
 
@@ -1,6 +1,3 @@
-import org.jetbrains.kotlin.gradle.dsl.JvmTarget
-import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
-
 plugins {
     application
     kotlin("jvm")
@@ -52,3 +49,21 @@ val runKotlinSparkUntypedDataset by tasks.registering(JavaExec::class) {
     javaLauncher = javaToolchains.launcherFor { languageVersion = JavaLanguageVersion.of(11) }
     mainClass = "org.jetbrains.kotlinx.dataframe.examples.kotlinSpark.UntypedDatasetKt"
 }
+
+/**
+ * Runs the spark/typedDataset example with java 11.
+ */
+val runSparkTypedDataset by tasks.registering(JavaExec::class) {
+    classpath = sourceSets["main"].runtimeClasspath
+    javaLauncher = javaToolchains.launcherFor { languageVersion = JavaLanguageVersion.of(11) }
+    mainClass = "org.jetbrains.kotlinx.dataframe.examples.spark.TypedDatasetKt"
+}
+
+/**
+ * Runs the spark/untypedDataset example with java 11.
+ */
+val runSparkUntypedDataset by tasks.registering(JavaExec::class) {
+    classpath = sourceSets["main"].runtimeClasspath
+    javaLauncher = javaToolchains.launcherFor { languageVersion = JavaLanguageVersion.of(11) }
+    mainClass = "org.jetbrains.kotlinx.dataframe.examples.spark.UntypedDatasetKt"
+}
@@ -1,313 +1,8 @@
-package org.jetbrains.kotlinx.dataframe.examples.kotlinSpark
-
-import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.sql.Dataset
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.RowFactory
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.types.ArrayType
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.DataTypes
-import org.apache.spark.sql.types.Decimal
-import org.apache.spark.sql.types.DecimalType
-import org.apache.spark.sql.types.MapType
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.unsafe.types.CalendarInterval
-import org.jetbrains.kotlinx.dataframe.AnyFrame
-import org.jetbrains.kotlinx.dataframe.DataColumn
-import org.jetbrains.kotlinx.dataframe.DataFrame
-import org.jetbrains.kotlinx.dataframe.DataRow
-import org.jetbrains.kotlinx.dataframe.api.rows
-import org.jetbrains.kotlinx.dataframe.api.schema
-import org.jetbrains.kotlinx.dataframe.api.toDataFrame
-import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
-import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
-import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
-import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
-import org.jetbrains.kotlinx.spark.api.toRDD
-import java.math.BigDecimal
-import java.math.BigInteger
-import java.sql.Date
-import java.sql.Timestamp
-import java.time.Instant
-import java.time.LocalDate
-import kotlin.reflect.KType
-import kotlin.reflect.KTypeProjection
-import kotlin.reflect.full.createType
-import kotlin.reflect.full.isSubtypeOf
-import kotlin.reflect.full.withNullability
-import kotlin.reflect.typeOf
-
-// region Spark to DataFrame
-
-/**
- * Converts an untyped Spark [Dataset] (Dataframe) to a Kotlin [DataFrame].
- * [StructTypes][StructType] are converted to [ColumnGroups][ColumnGroup].
- *
- * DataFrame supports type inference to do the conversion automatically.
- * This is usually fine for smaller data sets, but when working with larger datasets a type map might be a good idea.
- * See [convertToDataFrame] for more information.
- */
-fun Dataset<Row>.convertToDataFrameByInference(
-    schema: StructType = schema(),
-    prefix: List<String> = emptyList(),
-): AnyFrame {
-    val columns = schema.fields().map { field ->
-        val name = field.name()
-        when (val dataType = field.dataType()) {
-            is StructType ->
-                DataColumn.createColumnGroup(
-                    name = name,
-                    df = convertToDataFrameByInference(dataType, prefix + name),
-                )
-
-            else ->
-                DataColumn.createByInference(
-                    name = name,
-                    values = select((prefix + name).joinToString("."))
-                        .collectAsList()
-                        .map { it[0] },
-                    suggestedType = TypeSuggestion.Infer,
-                    nullable = field.nullable(),
-                )
-        }
-    }
-    return columns.toDataFrame()
-}
-
-/**
- * Converts an untyped Spark [Dataset] (Dataframe) to a Kotlin [DataFrame].
- * [StructTypes][StructType] are converted to [ColumnGroups][ColumnGroup].
- *
- * This version uses a [type-map][DataType.convertToDataFrame] to convert the schemas with a fallback to inference.
- * For smaller data sets, inference is usually fine too.
- * See [convertToDataFrameByInference] for more information.
- */
-fun Dataset<Row>.convertToDataFrame(schema: StructType = schema(), prefix: List<String> = emptyList()): AnyFrame {
-    val columns = schema.fields().map { field ->
-        val name = field.name()
-        when (val dataType = field.dataType()) {
-            is StructType ->
-                DataColumn.createColumnGroup(
-                    name = name,
-                    df = convertToDataFrame(dataType, prefix + name),
-                )
-
-            else ->
-                DataColumn.createByInference(
-                    name = name,
-                    values = select((prefix + name).joinToString("."))
-                        .collectAsList()
-                        .map { it[0] },
-                    suggestedType =
-                        dataType.convertToDataFrame()
-                            ?.let(TypeSuggestion::Use)
-                            ?: TypeSuggestion.Infer, // fallback to inference if needed
-                    nullable = field.nullable(),
-                )
-        }
-    }
-    return columns.toDataFrame()
-}
-
-/**
- * Returns the corresponding Kotlin type for a given Spark DataType.
- *
- * This list may be incomplete, but it can at least give you a good start.
- *
- * @return The KType that corresponds to the Spark DataType, or null if no matching KType is found.
- */
-fun DataType.convertToDataFrame(): KType? =
-    when {
-        this == DataTypes.ByteType -> typeOf<Byte>()
-
-        this == DataTypes.ShortType -> typeOf<Short>()
-
-        this == DataTypes.IntegerType -> typeOf<Int>()
-
-        this == DataTypes.LongType -> typeOf<Long>()
-
-        this == DataTypes.BooleanType -> typeOf<Boolean>()
-
-        this == DataTypes.FloatType -> typeOf<Float>()
-
-        this == DataTypes.DoubleType -> typeOf<Double>()
+@file:Suppress("ktlint:standard:no-empty-file")
 
-        this == DataTypes.StringType -> typeOf<String>()
-
-        this == DataTypes.DateType -> typeOf<Date>()
-
-        this == DataTypes.TimestampType -> typeOf<Timestamp>()
-
-        this is DecimalType -> typeOf<Decimal>()
-
-        this == DataTypes.CalendarIntervalType -> typeOf<CalendarInterval>()
-
-        this == DataTypes.NullType -> nullableNothingType
-
-        this == DataTypes.BinaryType -> typeOf<ByteArray>()
-
-        this is ArrayType -> {
-            when (elementType()) {
-                DataTypes.ShortType -> typeOf<ShortArray>()
-                DataTypes.IntegerType -> typeOf<IntArray>()
-                DataTypes.LongType -> typeOf<LongArray>()
-                DataTypes.FloatType -> typeOf<FloatArray>()
-                DataTypes.DoubleType -> typeOf<DoubleArray>()
-                DataTypes.BooleanType -> typeOf<BooleanArray>()
-                else -> null
-            }
-        }
-
-        this is MapType -> {
-            val key = keyType().convertToDataFrame() ?: return null
-            val value = valueType().convertToDataFrame() ?: return null
-            Map::class.createType(
-                listOf(
-                    KTypeProjection.invariant(key),
-                    KTypeProjection.invariant(value.withNullability(valueContainsNull())),
-                ),
-            )
-        }
-
-        else -> null
-    }
-
-// endregion
-
-// region DataFrame to Spark
-
-/**
- * Converts the DataFrame to a Spark Dataset of Rows using the provided SparkSession and JavaSparkContext.
- *
- * Spark needs both the data and the schema to be converted to create a correct [Dataset].
- *
- * @param spark The SparkSession object to use for creating the DataFrame.
- * @param sc The JavaSparkContext object to use for converting the DataFrame to RDD.
- * @return A Dataset of Rows representing the converted DataFrame.
- */
-fun DataFrame<*>.convertToSpark(spark: SparkSession, sc: JavaSparkContext): Dataset<Row> {
-    val rows = sc.toRDD(rows().map { it.convertToSpark() })
-    return spark.createDataFrame(rows, schema().convertToSpark())
-}
-
-/**
- * Converts a DataRow to a Spark Row object.
- *
- * @return The converted Spark Row.
- */
-fun DataRow<*>.convertToSpark(): Row =
-    RowFactory.create(
-        *values().map {
-            when (it) {
-                is DataRow<*> -> it.convertToSpark()
-                else -> it
-            }
-        }.toTypedArray(),
-    )
-
-/**
- * Converts a DataFrameSchema to a Spark StructType.
- *
- * @return The converted Spark StructType.
- */
-fun DataFrameSchema.convertToSpark(): StructType =
-    DataTypes.createStructType(
-        columns.map { (name, schema) ->
-            DataTypes.createStructField(name, schema.convertToSpark(), schema.nullable)
-        },
-    )
-
-/**
- * Converts a ColumnSchema object to Spark DataType.
- *
- * @return The Spark DataType corresponding to the given ColumnSchema object.
- * @throws IllegalArgumentException if the column type or kind is unknown.
- */
-fun ColumnSchema.convertToSpark(): DataType =
-    when (this) {
-        is ColumnSchema.Value -> type.convertToSpark() ?: error("unknown data type: $type")
-        is ColumnSchema.Group -> schema.convertToSpark()
-        is ColumnSchema.Frame -> error("nested dataframes are not supported")
-        else -> error("unknown column kind: $this")
-    }
+package org.jetbrains.kotlinx.dataframe.examples.kotlinSpark
 
-/**
- * Returns the corresponding Spark DataType for a given Kotlin type.
- *
- * This list may be incomplete, but it can at least give you a good start.
- *
- * @return The Spark DataType that corresponds to the Kotlin type, or null if no matching DataType is found.
+/*
+ * See ../spark/compatibilityLayer.kt for the implementation.
+ * It's the same with- and without the Kotlin Spark API.
  */
-fun KType.convertToSpark(): DataType? =
-    when {
-        isSubtypeOf(typeOf<Byte?>()) -> DataTypes.ByteType
-
-        isSubtypeOf(typeOf<Short?>()) -> DataTypes.ShortType
-
-        isSubtypeOf(typeOf<Int?>()) -> DataTypes.IntegerType
-
-        isSubtypeOf(typeOf<Long?>()) -> DataTypes.LongType
-
-        isSubtypeOf(typeOf<Boolean?>()) -> DataTypes.BooleanType
-
-        isSubtypeOf(typeOf<Float?>()) -> DataTypes.FloatType
-
-        isSubtypeOf(typeOf<Double?>()) -> DataTypes.DoubleType
-
-        isSubtypeOf(typeOf<String?>()) -> DataTypes.StringType
-
-        isSubtypeOf(typeOf<LocalDate?>()) -> DataTypes.DateType
-
-        isSubtypeOf(typeOf<Date?>()) -> DataTypes.DateType
-
-        isSubtypeOf(typeOf<Timestamp?>()) -> DataTypes.TimestampType
-
-        isSubtypeOf(typeOf<Instant?>()) -> DataTypes.TimestampType
-
-        isSubtypeOf(typeOf<Decimal?>()) -> DecimalType.SYSTEM_DEFAULT()
-
-        isSubtypeOf(typeOf<BigDecimal?>()) -> DecimalType.SYSTEM_DEFAULT()
-
-        isSubtypeOf(typeOf<BigInteger?>()) -> DecimalType.SYSTEM_DEFAULT()
-
-        isSubtypeOf(typeOf<CalendarInterval?>()) -> DataTypes.CalendarIntervalType
-
-        isSubtypeOf(nullableNothingType) -> DataTypes.NullType
-
-        isSubtypeOf(typeOf<ByteArray?>()) -> DataTypes.BinaryType
-
-        isSubtypeOf(typeOf<ShortArray?>()) -> DataTypes.createArrayType(DataTypes.ShortType, false)
-
-        isSubtypeOf(typeOf<IntArray?>()) -> DataTypes.createArrayType(DataTypes.IntegerType, false)
-
-        isSubtypeOf(typeOf<LongArray?>()) -> DataTypes.createArrayType(DataTypes.LongType, false)
-
-        isSubtypeOf(typeOf<FloatArray?>()) -> DataTypes.createArrayType(DataTypes.FloatType, false)
-
-        isSubtypeOf(typeOf<DoubleArray?>()) -> DataTypes.createArrayType(DataTypes.DoubleType, false)
-
-        isSubtypeOf(typeOf<BooleanArray?>()) -> DataTypes.createArrayType(DataTypes.BooleanType, false)
-
-        isSubtypeOf(typeOf<Array<*>>()) ->
-            error("non-primitive arrays are not supported for now, you can add it yourself")
-
-        isSubtypeOf(typeOf<List<*>>()) -> error("lists are not supported for now, you can add it yourself")
-
-        isSubtypeOf(typeOf<Set<*>>()) -> error("sets are not supported for now, you can add it yourself")
-
-        classifier == Map::class -> {
-            val (key, value) = arguments
-            DataTypes.createMapType(
-                key.type?.convertToSpark(),
-                value.type?.convertToSpark(),
-                value.type?.isMarkedNullable ?: true,
-            )
-        }
-
-        else -> null
-    }
-
-private val nullableNothingType: KType = typeOf<List<Nothing?>>().arguments.first().type!!
-
-// endregion
@@ -12,6 +12,9 @@ import org.jetbrains.kotlinx.dataframe.api.min
 import org.jetbrains.kotlinx.dataframe.api.print
 import org.jetbrains.kotlinx.dataframe.api.schema
 import org.jetbrains.kotlinx.dataframe.api.std
+import org.jetbrains.kotlinx.dataframe.examples.spark.convertToDataFrame
+import org.jetbrains.kotlinx.dataframe.examples.spark.convertToDataFrameByInference
+import org.jetbrains.kotlinx.dataframe.examples.spark.convertToSpark
 import org.jetbrains.kotlinx.spark.api.col
 import org.jetbrains.kotlinx.spark.api.gt
 import org.jetbrains.kotlinx.spark.api.withSpark
@@ -20,7 +23,7 @@ import org.jetbrains.kotlinx.spark.api.withSpark
  * Since we don't know the schema at compile time this time, we need to do
  * some schema mapping in between Spark and DataFrame.
  *
- * We will use compatibilityLayer.kt to do this.
+ * We will use spark/compatibilityLayer.kt to do this.
  *
  * NOTE: You will likely need to run this function with Java 8 or 11 for it to work correctly.
  * Use the `runKotlinSparkUntypedDataset` Gradle task to do so.