diff --git a/core/api/core.api b/core/api/core.api index b90da398fe..5e03f213bf 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -4351,7 +4351,10 @@ public final class org/jetbrains/kotlinx/dataframe/api/DuplicateKt { public final class org/jetbrains/kotlinx/dataframe/api/DynamicDataFrameBuilder { public fun ()V + public fun (Z)V + public synthetic fun (ZILkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun add (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/String; + public final fun get (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public final fun toDataFrame ()Lorg/jetbrains/kotlinx/dataframe/DataFrame; } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 13c6ade1be..6f45886941 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -413,29 +413,78 @@ public class DataFrameBuilder(private val header: List) { } /** - * Helper class for implementing operations when column names can be potentially duplicated. - * For example, operations involving multiple dataframes, computed columns or parsing some third-party data + * A builder class for dynamically constructing a DataFrame with provided columns. + * Allows adding columns manually while automatically handling duplicate column names by assigning unique names. + * + * @property checkDuplicateValues Whether to check for duplicate column (with identical names and values) + * when adding new columns. `true` by default. */ -public class DynamicDataFrameBuilder { - private var cols: MutableList = mutableListOf() +public class DynamicDataFrameBuilder(private val checkDuplicateValues: Boolean = true) { + private var cols: MutableMap = mutableMapOf() private val generator = ColumnNameGenerator() + /** + * Adds a column to the builder, ensuring its name is unique. + * + * - If a column with the same name already exists, the new column is renamed to a unique name. + * - If [checkDuplicateValues] is `true`, the method checks whether the new column has identical values + * to an existing column with the same name. If the values match, the column is not added. + * + * @param col The column to add to the DataFrame builder. + * @return The final unique name assigned to the column. + */ public fun add(col: AnyCol): String { - val uniqueName = if (col.name().isEmpty()) { + val originalName = col.name() + if (checkDuplicateValues && generator.contains(originalName)) { + if (cols[originalName] == col) return originalName + } + val uniqueName = if (originalName.isEmpty()) { generator.addUnique(UNNAMED_COLUMN_PREFIX) } else { - generator.addUnique(col.name()) + generator.addUnique(originalName) } - val renamed = if (uniqueName != col.name()) { + val renamed = if (uniqueName != originalName) { col.rename(uniqueName) } else { col } - cols.add(renamed) + cols.put(uniqueName, renamed) return uniqueName } - public fun toDataFrame(): DataFrame<*> = dataFrameOf(cols) + /** + * Adds a column to the builder from the given iterable of values, ensuring the column's name is unique. + * + * The method automatically converts the given iterable into a column using the specified or default name + * and infers the type of the column's elements. + * + * - If a column with the same name already exists, the new column is renamed to a unique name. + * - If the [checkDuplicateValues] property of the builder is `true`, the method checks whether the new column + * has identical values to an existing column with the same name. If the values match, the column is not added. + * + * @param T The inferred type of the elements in the column. + * @param values The iterable collection of values to be added as a new column. + * @param name The name of the new column. If empty, a unique name will be generated automatically. + * @return The final unique name assigned to the column. + */ + public inline fun add(values: Iterable, name: String = ""): String = + add(values.toColumn(name, Infer.Type)) + + /** + * Retrieves a column from the builder by its name. + * + * @param column The name of the column to retrieve. + * @return The column corresponding to the specified name, or `null` if no such column exists. + */ + public fun get(column: String): AnyCol? = cols[column] + + /** + * Converts the current `DynamicDataFrameBuilder` instance into a `DataFrame`. + * The resulting `DataFrame` is constructed from the columns stored in the builder. + * + * @return A `DataFrame` containing the columns defined in the `DynamicDataFrameBuilder`. + */ + public fun toDataFrame(): DataFrame<*> = cols.values.toDataFrame() } /** diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 36a79170c0..a09b476532 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -31,12 +31,39 @@ class ConstructorsTests { @Test fun `duplicated name`() { val builder = DynamicDataFrameBuilder() - val column by columnOf(1, 2, 3) - builder.add(column) - builder.add(column) + val columnName = "columnName" + val columnA = columnOf(1, 2, 3) named columnName + val columnB = columnOf(4, 5, 6) named columnName + builder.add(columnA) + builder.add(columnB) val df = builder.toDataFrame() df.columnsCount() shouldBe 2 - df.columnNames() shouldBe listOf(column.name(), "${column.name()}1") + df.columnNames() shouldBe listOf(columnName, "${columnName}1") + } + + @Test + fun `get by new name`() { + val builder = DynamicDataFrameBuilder() + val columnName = "columnName" + val columnA = columnOf(1, 2, 3) named columnName + val columnB = columnOf(4, 5, 6) named columnName + builder.add(columnA) + val newName = builder.add(columnB) + builder.get(newName)!!.values shouldBe columnB.values + } + + @Test + fun `duplicated column`() { + val builder = DynamicDataFrameBuilder() + val columnName = "columnName" + val columnA = columnOf(1, 2, 3) named columnName + val columnB = columnOf(4, 5, 6) named columnName + builder.add(columnA) + builder.add(columnB) + builder.add(columnA) + val df = builder.toDataFrame() + df.columnsCount() shouldBe 2 + df.columnNames() shouldBe listOf(columnName, "${columnName}1") } @Test diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 13c6ade1be..81ae8ee725 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -413,29 +413,79 @@ public class DataFrameBuilder(private val header: List) { } /** - * Helper class for implementing operations when column names can be potentially duplicated. - * For example, operations involving multiple dataframes, computed columns or parsing some third-party data + * A builder class for dynamically constructing a DataFrame with provided columns. + * Allows adding columns manually while automatically handling duplicate column names by assigning unique names. + * + * @property checkDuplicateValues Whether to check for duplicate column (with identical names and values). If `true`, + * doesn't add a new column if the identical one is already in the builder. + * when adding new columns. `true` by default. */ -public class DynamicDataFrameBuilder { - private var cols: MutableList = mutableListOf() +public class DynamicDataFrameBuilder(private val checkDuplicateValues: Boolean = true) { + private var cols: MutableMap = mutableMapOf() private val generator = ColumnNameGenerator() + /** + * Adds a column to the builder, ensuring its name is unique. + * + * - If a column with the same name already exists, the new column is renamed to a unique name. + * - If [checkDuplicateValues] is `true`, the method checks whether the new column has identical values + * to an existing column with the same name. If the values match, the column is not added. + * + * @param col The column to add to the DataFrame builder. + * @return The final unique name assigned to the column. + */ public fun add(col: AnyCol): String { - val uniqueName = if (col.name().isEmpty()) { + val originalName = col.name() + if (checkDuplicateValues && generator.contains(originalName)) { + if (cols[originalName] == col) return originalName + } + val uniqueName = if (originalName.isEmpty()) { generator.addUnique(UNNAMED_COLUMN_PREFIX) } else { - generator.addUnique(col.name()) + generator.addUnique(originalName) } - val renamed = if (uniqueName != col.name()) { + val renamed = if (uniqueName != originalName) { col.rename(uniqueName) } else { col } - cols.add(renamed) + cols.put(uniqueName, renamed) return uniqueName } - public fun toDataFrame(): DataFrame<*> = dataFrameOf(cols) + /** + * Adds a column to the builder from the given iterable of values, ensuring the column's name is unique. + * + * The method automatically converts the given iterable into a column using the specified or default name + * and infers the type of the column's elements. + * + * - If a column with the same name already exists, the new column is renamed to a unique name. + * - If the [checkDuplicateValues] property of the builder is `true`, the method checks whether the new column + * has identical values to an existing column with the same name. If the values match, the column is not added. + * + * @param T The inferred type of the elements in the column. + * @param values The iterable collection of values to be added as a new column. + * @param name The name of the new column. If empty, a unique name will be generated automatically. + * @return The final unique name assigned to the column. + */ + public inline fun add(values: Iterable, name: String = ""): String = + add(values.toColumn(name, Infer.Type)) + + /** + * Retrieves a column from the builder by its name. + * + * @param column The name of the column to retrieve. + * @return The column corresponding to the specified name, or `null` if no such column exists. + */ + public fun get(column: String): AnyCol? = cols[column] + + /** + * Converts the current [DynamicDataFrameBuilder] instance into a [DataFrame]. + * The resulting [DataFrame] is constructed from the columns stored in the builder. + * + * @return A [DataFrame] containing the columns defined in the [DynamicDataFrameBuilder]. + */ + public fun toDataFrame(): DataFrame<*> = cols.values.toDataFrame() } /** diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 36a79170c0..a09b476532 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -31,12 +31,39 @@ class ConstructorsTests { @Test fun `duplicated name`() { val builder = DynamicDataFrameBuilder() - val column by columnOf(1, 2, 3) - builder.add(column) - builder.add(column) + val columnName = "columnName" + val columnA = columnOf(1, 2, 3) named columnName + val columnB = columnOf(4, 5, 6) named columnName + builder.add(columnA) + builder.add(columnB) val df = builder.toDataFrame() df.columnsCount() shouldBe 2 - df.columnNames() shouldBe listOf(column.name(), "${column.name()}1") + df.columnNames() shouldBe listOf(columnName, "${columnName}1") + } + + @Test + fun `get by new name`() { + val builder = DynamicDataFrameBuilder() + val columnName = "columnName" + val columnA = columnOf(1, 2, 3) named columnName + val columnB = columnOf(4, 5, 6) named columnName + builder.add(columnA) + val newName = builder.add(columnB) + builder.get(newName)!!.values shouldBe columnB.values + } + + @Test + fun `duplicated column`() { + val builder = DynamicDataFrameBuilder() + val columnName = "columnName" + val columnA = columnOf(1, 2, 3) named columnName + val columnB = columnOf(4, 5, 6) named columnName + builder.add(columnA) + builder.add(columnB) + builder.add(columnA) + val df = builder.toDataFrame() + df.columnsCount() shouldBe 2 + df.columnNames() shouldBe listOf(columnName, "${columnName}1") } @Test