@@ -7,6 +7,8 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
77import org.jetbrains.kotlinx.dataframe.DataFrame
88import org.jetbrains.kotlinx.dataframe.DataRow
99import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
10+ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
11+ import org.jetbrains.kotlinx.dataframe.annotations.Refine
1012import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1113import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1214import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
@@ -18,10 +20,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
1820import org.jetbrains.kotlinx.dataframe.impl.asList
1921import org.jetbrains.kotlinx.dataframe.impl.columnName
2022import org.jetbrains.kotlinx.dataframe.impl.getListType
23+ import org.jetbrains.kotlinx.dataframe.util.SPLIT_STR
2124import kotlin.reflect.KProperty
2225import kotlin.reflect.KType
2326import kotlin.reflect.typeOf
2427
28+ @Interpretable(" Split0" )
2529public fun <T , C > DataFrame<T>.split (columns : ColumnsSelector <T , C ?>): Split <T , C > = Split (this , columns)
2630
2731public fun <T > DataFrame<T>.split (vararg columns : String ): Split <T , Any > = split { columns.toColumnSet() }
@@ -62,22 +66,27 @@ public typealias ColumnNamesGenerator<C> = ColumnWithPath<C>.(extraColumnIndex:
6266
6367// region default
6468
69+ @Interpretable(" SplitDefault" )
6570public inline fun <T , C : Iterable <R >, reified R > Split <T , C >.default (value : R ? ): SplitWithTransform <T , C , R > =
6671 by { it }.default(value)
6772
73+ @Deprecated(SPLIT_STR , ReplaceWith (""" by(",").default(value)""" ))
6874public fun <T > Split <T , String >.default (value : String? ): SplitWithTransform <T , String , String > =
6975 by { it.splitDefault() }.default(value)
7076
77+ @Interpretable(" SplitWithTransformDefault" )
7178public fun <T , C , R > SplitWithTransform <T , C , R >.default (value : R ? ): SplitWithTransform <T , C , R > = copy(default = value)
7279
7380// endregion
7481
7582// region by
7683
84+ @Interpretable(" ByIterable" )
7785public inline fun <T , C , reified R > Split <T , C >.by (
7886 noinline splitter : DataRow <T >.(C ) -> Iterable <R >,
7987): SplitWithTransform <T , C , R > = by(typeOf<R >(), splitter)
8088
89+ @Interpretable(" ByCharDelimiters" )
8190public fun <T , C > Split <T , C >.by (
8291 vararg delimiters : Char ,
8392 trim : Boolean = true,
@@ -90,6 +99,22 @@ public fun <T, C> Split<T, C>.by(
9099 }
91100 }
92101
102+ /* *
103+ * Example:
104+ * ```
105+ * dataFrameOf("str" to listOf("1 2 3 4"))
106+ * .split("str").by("\s+".toRegex())
107+ * // when the list of explicitly specified columnNames is not long enough (or none at all),
108+ * // names for additional columns are generates
109+ * .into()
110+ * ```
111+ * Result:
112+ * ```
113+ * split1 split2 split3 split4
114+ * 1 2 3 4
115+ * ```
116+ */
117+ @Interpretable(" ByRegex" )
93118public fun <T , C > Split <T , C >.by (
94119 regex : Regex ,
95120 trim : Boolean = true,
@@ -101,6 +126,7 @@ public fun <T, C> Split<T, C>.by(
101126 }
102127 }
103128
129+ @Interpretable(" ByStringDelimiters" )
104130public fun <T , C > Split <T , C >.by (
105131 vararg delimiters : String ,
106132 trim : Boolean = true,
@@ -126,10 +152,34 @@ internal inline fun <T, C, R> Split<T, C>.by(
126152
127153// region match
128154
155+ /* *
156+ * Creates new String columns according to MatchResult [capturing groups](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.text/-match-result/group-values.html),
157+ * excluding the first group which is entire matched String.
158+ * Example:
159+ * ```
160+ * dataFrameOf("str" to listOf("100 ml", "1 L"))
161+ * .split { "str"<String>() }.match("(\d+)\s*(ml|l|L)").into("volume", "unit")
162+ * ```
163+ * Created columns will be nullable if [regex] doesn't match some rows or there are nulls in original column
164+ * Check [Split.by] overload with regex parameter if you're looking to split String value by [Regex] delimiter
165+ */
166+ @Interpretable(" MatchStringRegex" )
129167public fun <T , C : String ?> Split <T , C >.match (
130168 @Language(" RegExp" ) regex : String ,
131169): SplitWithTransform <T , C , String ?> = match(regex.toRegex())
132170
171+ /* *
172+ * Creates new String columns according to MatchResult [capturing groups](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.text/-match-result/group-values.html),
173+ * excluding the first group which is entire matched String.
174+ * Example:
175+ * ```
176+ * dataFrameOf("str" to listOf("100 ml", "1 L"))
177+ * .split { "str"<String>() }.match("(\d+)\s*(ml|l|L)").into("volume", "unit")
178+ * ```
179+ * Created columns will be nullable if [regex] doesn't match some rows or there are nulls in original column
180+ * Check [Split.by][org.jetbrains.kotlinx.dataframe.api.Split.by] overload with regex parameter if you're looking to split String value by [Regex] delimiter
181+ */
182+ @Interpretable(" MatchRegex" )
133183public fun <T , C : String ?> Split <T , C >.match (regex : Regex ): SplitWithTransform <T , C , String ?> =
134184 by {
135185 it?.let {
@@ -171,6 +221,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.into(
171221 vararg otherNames : KProperty <* >,
172222): DataFrame <T > = into(listOf (firstName.columnName) + otherNames.map { it.columnName })
173223
224+ @Refine
225+ @Interpretable(" SplitWithTransformInto0" )
174226public fun <T , C , R > SplitWithTransform <T , C , R >.into (
175227 vararg names : String ,
176228 extraNamesGenerator : (ColumnWithPath <C >.(extraColumnIndex: Int ) -> String )? = null,
@@ -188,6 +240,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.into(
188240 }
189241 }
190242
243+ @Refine
244+ @Interpretable(" SplitIterableInto" )
191245public fun <T , C : Iterable <* >> Split <T , C >.into (
192246 vararg names : String ,
193247 extraNamesGenerator : ColumnNamesGenerator <C >? = null,
@@ -199,6 +253,8 @@ public fun <T, C> Split<T, DataFrame<C>>.into(
199253 extraNamesGenerator : ColumnNamesGenerator <DataFrame <C >>? = null,
200254): DataFrame <T > = by { it.rows() }.into(names.toList(), extraNamesGenerator)
201255
256+ @Refine
257+ @Interpretable(" SplitPair" )
202258public fun <T , A , B > Split <T , Pair <A , B >>.into (firstCol : String , secondCol : String ): DataFrame <T > =
203259 by { listOf (it.first, it.second) }.into(firstCol, secondCol)
204260
@@ -211,6 +267,7 @@ public inline fun <T, reified A, reified B> Split<T, Pair<A, B>>.into(
211267 secondCol : ColumnAccessor <B >,
212268): DataFrame <T > = by { listOf (it.first, it.second) }.into(firstCol, secondCol)
213269
270+ @Deprecated(SPLIT_STR , ReplaceWith (""" by(",").into(*names, extraNamesGenerator = extraNamesGenerator)""" ))
214271@JvmName(" intoTC" )
215272public fun <T > Split <T , String >.into (
216273 vararg names : String ,
@@ -226,6 +283,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.inward(
226283 extraNamesGenerator : ColumnNamesGenerator <C >? = null,
227284): DataFrame <T > = copy(inward = true ).into(names.toList(), extraNamesGenerator)
228285
286+ @Refine
287+ @Interpretable(" SplitWithTransformInward0" )
229288public fun <T , C , R > SplitWithTransform <T , C , R >.inward (
230289 vararg names : String ,
231290 extraNamesGenerator : ColumnNamesGenerator <C >? = null,
@@ -272,6 +331,7 @@ public inline fun <T, reified A, reified B> Split<T, Pair<A, B>>.inward(
272331 secondCol : ColumnAccessor <B >,
273332): DataFrame <T > = by { listOf (it.first, it.second) }.inward(firstCol, secondCol)
274333
334+ @Deprecated(SPLIT_STR , ReplaceWith (""" by(",").inward(*names, extraNamesGenerator = extraNamesGenerator)""" ))
275335@JvmName(" inwardTC" )
276336public fun <T > Split <T , String >.inward (
277337 vararg names : String ,
@@ -282,6 +342,8 @@ public fun <T> Split<T, String>.inward(
282342
283343// region intoColumns
284344
345+ @Refine
346+ @Interpretable(" SplitAnyFrameIntoColumns" )
285347public fun <T , C : AnyFrame > Split <T , C >.intoColumns (): DataFrame <T > =
286348 df.convert(columns).with {
287349 when {
@@ -296,11 +358,15 @@ public fun <T, C : AnyFrame> Split<T, C>.intoColumns(): DataFrame<T> =
296358// region intoRows
297359
298360@JvmName(" intoRowsTC" )
361+ @Refine
362+ @Interpretable(" SplitIntoRows" )
299363public inline fun <T , C : Iterable <R >, reified R > Split <T , C >.intoRows (dropEmpty : Boolean = true): DataFrame <T > =
300364 by { it }
301365 .intoRows(dropEmpty)
302366
303367@JvmName(" intoRowsFrame" )
368+ @Refine
369+ @Interpretable(" SplitAnyFrameRows" )
304370public fun <T , C : AnyFrame > Split <T , C >.intoRows (dropEmpty : Boolean = true): DataFrame <T > =
305371 by { it.rows() }.intoRows(dropEmpty)
306372
@@ -309,6 +375,8 @@ internal inline fun <T, C, R> Convert<T, C?>.splitInplace(
309375 crossinline transform : DataRow <T >.(C ) -> Iterable <R >,
310376) = withRowCellImpl(getListType(type), Infer .None ) { if (it == null ) emptyList() else transform(it).asList() }
311377
378+ @Refine
379+ @Interpretable(" SplitWithTransformIntoRows" )
312380public fun <T , C , R > SplitWithTransform <T , C , R >.intoRows (dropEmpty : Boolean = true): DataFrame <T > {
313381 val paths = df.getColumnPaths(columns).toColumnSet()
314382 return df.convert { paths as ColumnSet <C ?> }.splitInplace(tartypeOf, transform).explode(dropEmpty) { paths }
@@ -319,8 +387,12 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.intoRows(dropEmpty: Boolean = t
319387// region inplace
320388
321389@JvmName(" inplaceTC" )
390+ @Refine
391+ @Interpretable(" SplitInplace" )
322392public inline fun <T , C : Iterable <R >, reified R > Split <T , C >.inplace (): DataFrame <T > = by { it }.inplace()
323393
394+ @Refine
395+ @Interpretable(" SplitWithTransformInplace" )
324396public fun <T , C , R > SplitWithTransform <T , C , R >.inplace (): DataFrame <T > =
325397 df.convert(columns).splitInplace(tartypeOf, transform)
326398
0 commit comments