Kotlin · AndreiKingsley · Feb 28, 2025 · Feb 25, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/core/api/core.api b/core/api/core.api
@@ -10077,7 +10077,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
 }
 
 public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt {
-	public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Ljava/lang/Iterable;
+	public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Lkotlin/sequences/Sequence;
 }
 
 public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt {

diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt
@@ -18,8 +18,6 @@ import org.jetbrains.kotlinx.dataframe.samples.api.city
 import org.jetbrains.kotlinx.dataframe.samples.api.firstName
 import org.jetbrains.kotlinx.dataframe.samples.api.lastName
 import org.jetbrains.kotlinx.dataframe.samples.api.name
-import org.jetbrains.kotlinx.dataframe.samples.api.secondName
-import org.jetbrains.kotlinx.dataframe.samples.api.thirdName
 import org.jetbrains.kotlinx.dataframe.samples.api.weight
 import org.junit.Test
 import kotlin.reflect.typeOf

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt
@@ -18,8 +18,6 @@ import org.jetbrains.kotlinx.dataframe.samples.api.city
 import org.jetbrains.kotlinx.dataframe.samples.api.firstName
 import org.jetbrains.kotlinx.dataframe.samples.api.lastName
 import org.jetbrains.kotlinx.dataframe.samples.api.name
-import org.jetbrains.kotlinx.dataframe.samples.api.secondName
-import org.jetbrains.kotlinx.dataframe.samples.api.thirdName
 import org.jetbrains.kotlinx.dataframe.samples.api.weight
 import org.junit.Test
 import kotlin.reflect.typeOf

diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt
@@ -20,6 +20,7 @@ import org.apache.poi.util.DefaultTempFileCreationStrategy
 import org.apache.poi.util.LocaleUtil
 import org.apache.poi.util.LocaleUtil.getUserTimeZone
 import org.apache.poi.util.TempFile
+import org.apache.poi.xssf.streaming.SXSSFWorkbook
 import org.apache.poi.xssf.usermodel.XSSFWorkbook
 import org.jetbrains.kotlinx.dataframe.AnyFrame
 import org.jetbrains.kotlinx.dataframe.AnyRow
@@ -470,6 +471,28 @@ private fun Cell?.cellValue(sheetName: String): Any? {
     return getValueFromType(cellType)
 }
 
+public enum class WorkBookType {
+    XLS,
+    XLSX,
+}
+
+/**
+ * Writes this DataFrame to an Excel file as a single sheet.
+ *
+ * Implemented with [Apache POI](https://poi.apache.org) using `HSSFWorkbook` for XLS files,
+ * `XSSFWorkbook` for standard XLSX files, and `SXSSFWorkbook` for memory-efficient streaming when creating new XLSX files.
+ *
+ * @param path The path to the file where the data will be written.
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include in the file. The default is all columns.
+ * @param sheetName The name of the sheet in the Excel file. If null, the default name will be used.
+ * @param writeHeader A flag indicating whether to write the header row in the Excel file. Defaults to true.
+ * @param workBookType The [type of workbook][WorkBookType] to create (e.g., XLS or XLSX). Defaults to XLSX.
+ * @param keepFile If `true` and the file already exists, a new sheet will be appended instead of overwriting the file.
+ * This may result in higher memory usage and slower performance compared to creating a new file.
+ * Defaults to `false`.
+ *
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
+ */
 public fun <T> DataFrame<T>.writeExcel(
     path: String,
     columnsSelector: ColumnsSelector<T, *> = { all() },
@@ -479,11 +502,24 @@ public fun <T> DataFrame<T>.writeExcel(
     keepFile: Boolean = false,
 ): Unit = writeExcel(File(path), columnsSelector, sheetName, writeHeader, workBookType, keepFile)
 
-public enum class WorkBookType {
-    XLS,
-    XLSX,
-}
-
+/**
+ * Writes this DataFrame to an Excel file as a single sheet.
+ *
+ * Implemented with [Apache POI](https://poi.apache.org) using `HSSFWorkbook` for XLS files,
+ * `XSSFWorkbook` for standard XLSX files,
+ * and `SXSSFWorkbook` for memory-efficient streaming when creating new XLSX files.
+ *
+ * @param file The file where the data will be written.
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include in the file. The default is all columns.
+ * @param sheetName The name of the sheet in the Excel file. If null, the default name will be used.
+ * @param writeHeader A flag indicating whether to write the header row in the Excel file. Defaults to true.
+ * @param workBookType The [type of workbook][WorkBookType] to create (e.g., XLS or XLSX). Defaults to XLSX.
+ * @param keepFile If `true` and the file already exists, a new sheet will be appended instead of overwriting the file.
+ * This may result in higher memory usage and slower performance compared to creating a new file.
+ * Defaults to `false`.
+ *
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
+ */
 public fun <T> DataFrame<T>.writeExcel(
     file: File,
     columnsSelector: ColumnsSelector<T, *> = { all() },
@@ -493,22 +529,41 @@ public fun <T> DataFrame<T>.writeExcel(
     keepFile: Boolean = false,
 ) {
     val factory =
-        if (keepFile) {
+        // Write to an existing file with `keepFile` flag
+        if (keepFile && file.exists() && file.length() > 0L) {
+            val fis = file.inputStream()
             when (workBookType) {
-                WorkBookType.XLS -> HSSFWorkbook(file.inputStream())
-                WorkBookType.XLSX -> XSSFWorkbook(file.inputStream())
+                WorkBookType.XLS -> HSSFWorkbook(fis)
+                WorkBookType.XLSX -> XSSFWorkbook(fis)
             }
         } else {
             when (workBookType) {
                 WorkBookType.XLS -> HSSFWorkbook()
-                WorkBookType.XLSX -> XSSFWorkbook()
+
+                // Use streaming mode for a new XLSX file
+                WorkBookType.XLSX -> SXSSFWorkbook()
             }
         }
     return file.outputStream().use {
         writeExcel(it, columnsSelector, sheetName, writeHeader, factory)
     }
 }
 
+/**
+ * Writes this DataFrame to an Excel file using an existing [Workbook] instance into given [OutputStream].
+ *
+ * Uses [Apache POI](https://poi.apache.org).
+ * Supports [XSSFWorkbook] and [SXSSFWorkbook] for XLSX and [HSSFWorkbook] for XLS,
+ * and allows users to manage the workbook externally.
+ *
+ * @param outputStream The output stream where the Excel data will be written.
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include in the file. The default is all columns.
+ * @param sheetName The name of the sheet in the Excel file. If null, the default name will be used.
+ * @param writeHeader A flag indicating whether to write the header row in the Excel file. Defaults to true.
+ * @param factory The [Workbook] instance, allowing integration with an existing workbook.
+ *
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
+ */
 public fun <T> DataFrame<T>.writeExcel(
     outputStream: OutputStream,
     columnsSelector: ColumnsSelector<T, *> = { all() },
@@ -522,6 +577,25 @@ public fun <T> DataFrame<T>.writeExcel(
     wb.close()
 }
 
+/**
+ * Creates a new [Sheet] in the given [Workbook] and writes this DataFrame content into it.
+ *
+ * Uses [Apache POI](https://poi.apache.org).
+ * Supports [XSSFWorkbook] and [SXSSFWorkbook] for XLSX and [HSSFWorkbook] for XLS,
+ * and allows users to manage the workbook externally.
+ *
+ * Automatically handles datetime types.
+ * Skips null values to prevent Apache POI from treating empty cells incorrectly.
+ *
+ * @param wb The [Workbook] where the sheet will be created.
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include. Defaults to all columns.
+ * @param sheetName The name of the sheet. If null, a default sheet name is used.
+ * @param writeHeader Whether to include a header row with column names. Defaults to true.
+ *
+ * @return The created [Sheet] instance containing the DataFrame data.
+ *
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
+ */
 public fun <T> DataFrame<T>.writeExcel(
     wb: Workbook,
     columnsSelector: ColumnsSelector<T, *> = { all() },