Skip to content

Commit 0aaee79

Browse files
Merge pull request #1072 from Kotlin/camel_case_fix
fix camelCase
2 parents a7c9c95 + ed23939 commit 0aaee79

File tree

11 files changed

+522
-58
lines changed

11 files changed

+522
-58
lines changed

core/api/core.api

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9948,8 +9948,6 @@ public final class org/jetbrains/kotlinx/dataframe/impl/UtilsKt {
99489948
public static final fun getColumnName (Lkotlin/reflect/KCallable;)Ljava/lang/String;
99499949
public static final fun getColumnName (Lkotlin/reflect/KFunction;)Ljava/lang/String;
99509950
public static final fun getColumnName (Lkotlin/reflect/KProperty;)Ljava/lang/String;
9951-
public static final fun getDELIMITED_STRING_REGEX ()Lkotlin/text/Regex;
9952-
public static final fun getDELIMITERS_REGEX ()Lkotlin/text/Regex;
99539951
public static final fun headPlusArray (B[B)[B
99549952
public static final fun headPlusArray (C[C)[C
99559953
public static final fun headPlusArray (D[D)[D
@@ -9958,7 +9956,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/UtilsKt {
99589956
public static final fun headPlusArray (J[J)[J
99599957
public static final fun headPlusArray (S[S)[S
99609958
public static final fun headPlusArray (Z[Z)[Z
9961-
public static final fun toCamelCaseByDelimiters (Ljava/lang/String;Lkotlin/text/Regex;)Ljava/lang/String;
9959+
public static final fun toCamelCaseByDelimiters (Ljava/lang/String;Lkotlin/text/Regex;Ljava/lang/String;)Ljava/lang/String;
9960+
public static synthetic fun toCamelCaseByDelimiters$default (Ljava/lang/String;Lkotlin/text/Regex;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
99629961
public static final fun zero (Lkotlin/reflect/KClass;)Ljava/lang/Number;
99639962
public static final fun zeroOrNull (Lkotlin/reflect/KClass;)Ljava/lang/Number;
99649963
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
1313
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
1414
import org.jetbrains.kotlinx.dataframe.columns.renamedReference
1515
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
16-
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
17-
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
1816
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
1917
import org.jetbrains.kotlinx.dataframe.impl.columnName
2018
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
@@ -46,23 +44,44 @@ public class RenameClause<T, C>(internal val df: DataFrame<T>, internal val colu
4644
}
4745

4846
/**
49-
* ## Rename to camelCase
47+
* ## Rename to "camelCase"
48+
*
49+
* This function renames all columns in this [DataFrame] to the "camelCase" format.
50+
*
51+
* Removes all delimiters between words and capitalizes each word except the first one.
52+
* Adds an underscore between consecutive numbers.
53+
* If the string does not contain any letters or numbers, it remains unchanged.
54+
*
55+
* This function supports converting names from `snake_case`, `PascalCase`, and other delimited formats
56+
* into a consistent "camelCase" representation.
5057
*
51-
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
52-
* and converting the first char to lowercase.
53-
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
58+
* [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
59+
*
60+
* Returns a [DataFrame] with updated column names.
61+
*
62+
* ### Examples:
63+
* ```
64+
* "snake_case_name" -> "snakeCaseName"
65+
* "PascalCaseName" -> "pascalCaseName"
66+
* "doner-case-name" -> "donerCaseName"
67+
* "UPPER_CASE_NAME -> upperCaseName"
68+
* ```
69+
*
70+
* @return a [DataFrame] with column names converted to "camelCase" format.
5471
*/
5572
@Refine
5673
@Interpretable("RenameToCamelCase")
5774
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> =
5875
// recursively rename all columns written with delimiters or starting with a capital to camel case
5976
rename {
60-
colsAtAnyDepth { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }
77+
colsAtAnyDepth()
6178
}.toCamelCase()
6279
// take all frame columns at any depth and call renameToCamelCase() on all dataframes inside
6380
.update {
6481
colsAtAnyDepth().colsOf<AnyFrame>()
65-
}.with { it.renameToCamelCase() }
82+
}.with {
83+
it.renameToCamelCase()
84+
}
6685

6786
@AccessApiOverload
6887
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
@@ -80,10 +99,27 @@ public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> Stri
8099
renameImpl(transform)
81100

82101
/**
83-
* ## Rename to camelCase
102+
* ## Rename to "camelCase"
84103
*
85-
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
86-
* and converting the first char to lowercase.
104+
* Renames the columns, previously selected with [rename] to "camelCase" format.
105+
* All delimiters between words are removed, words are capitalized except for the first one.
106+
* Places underscore between numbers.
107+
* If the string does not contain any letters or numbers, it remains unchanged.
108+
*
109+
* Returns a [DataFrame] with updated column names.
110+
*
111+
* This function supports converting names from `snake_case`, `PascalCase`, and other delimited formats
112+
* into a consistent "camelCase" representation.
113+
*
114+
* ### Examples:
115+
* ```
116+
* "snake_case_name" -> "snakeCaseName"
117+
* "PascalCaseName" -> "pascalCaseName"
118+
* "doner-case-name" -> "donerCaseName"
119+
* "UPPER_CASE_NAME -> upperCaseName"
120+
* ```
121+
*
122+
* @return a [DataFrame] with column names converted to "camelCase" format.
87123
*/
88124
@Refine
89125
@Interpretable("RenameToCamelCaseClause")
@@ -96,15 +132,29 @@ public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into { it.ren
96132
/**
97133
* ## Rename to camelCase
98134
*
99-
* Renames this column to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
100-
* and converting the first char to lowercase.
135+
* Renames this column to "camelCase" format.
136+
* All delimiters between words are removed, words are capitalized except for the first one.
137+
* Places underscore between numbers.
138+
* If the string does not contain any letters or numbers, it remains unchanged.
139+
*
140+
* Returns a [ColumnReference] with updated name.
141+
*
142+
* This function supports converting names from `snake_case`, `PascalCase`, and other delimited formats
143+
* into a consistent "camelCase" representation.
144+
*
145+
* ### Examples:
146+
* ```
147+
* "snake_case_name" -> "snakeCaseName"
148+
* "PascalCaseName" -> "pascalCaseName"
149+
* "doner-case-name" -> "donerCaseName"
150+
* "UPPER_CASE_NAME -> upperCaseName"
151+
* ```
152+
* @return a [ColumnReference] with the name converted to "camelCase" format.
101153
*/
102154
@Suppress("UNCHECKED_CAST")
103155
public fun <T, C : ColumnReference<T>> C.renameToCamelCase(): C =
104156
rename(
105-
this.name()
106-
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
107-
.replaceFirstChar { it.lowercaseChar() },
157+
this.name().toCamelCaseByDelimiters(),
108158
) as C
109159

110160
@Suppress("UNCHECKED_CAST")

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt

Lines changed: 100 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -337,28 +337,118 @@ internal fun <T : Comparable<T>> T.between(left: T, right: T, includeBoundaries:
337337
this > left && this < right
338338
}
339339

340-
private const val DELIMITERS = "[_\\s]"
341-
public val DELIMITERS_REGEX: Regex = DELIMITERS.toRegex()
342-
public val DELIMITED_STRING_REGEX: Regex = ".+$DELIMITERS.+".toRegex()
340+
// Single regex to split words by non-alphanumeric characters, camelCase, and numbers
341+
internal val CAMEL_DEFAULT_DELIMITERS_REGEX =
342+
(
343+
"[^\\p{L}0-9]+|(?<=[\\p{Ll}])(?=[\\p{Lu}])|(?<=[\\p{Lu}])" +
344+
"(?=[\\p{Lu}][\\p{Ll}])|(?<=\\d)(?=[\\p{L}])|(?<=[\\p{L}])(?=\\d)"
345+
)
346+
.toRegex()
343347

344-
internal val CAMEL_REGEX = "(?<=[a-zA-Z])[A-Z]".toRegex()
348+
/**
349+
* Converts a string into lowerCamelCase using [delimiters].
350+
*
351+
* - Splits this string matching given [delimiters] regular expression
352+
* (by default, via [CAMEL_DEFAULT_DELIMITERS_REGEX] - any characters that are not letters or digits).
353+
* - If the string does not contain any letters or numbers, it remains unchanged.
354+
* - Places underscore ("_") between consecutive numbers (that were split before).
355+
* - The first word remains in lowercase, and subsequent words are capitalized.
356+
*
357+
* Default behavior (with [CAMEL_DEFAULT_DELIMITERS_REGEX]):
358+
*
359+
* ```
360+
* "hello_world" -> "helloWorld"
361+
* "HelloWorld" -> "helloWorld"
362+
* "json.parser.Config" -> "jsonParserConfig"
363+
* "my.var_name test" -> "myVarNameTest"
364+
* "thirdColumn" -> "thirdColumn"
365+
* "someHTMLParser" -> "someHtmlParser"
366+
* "RESTApi" -> "restApi"
367+
* "OAuth2Token" -> "oAuth2Token"
368+
* "GraphQLQuery" -> "graphQlQuery"
369+
* "TCP_3_PROTOCOL" -> "tcp3Protocol"
370+
* "123hello_world456" -> "123HelloWorld456"
371+
* "API_Response_2023" -> "apiResponse2023"
372+
* "UPPER_case-LOWER" -> "upperCaseLower"
373+
* "12parse34CamelCase" -> "12Parse34CamelCase"
374+
* "snake_case_example" -> "snakeCaseExample"
375+
* "dot.separated.words" -> "dotSeparatedWords"
376+
* "kebab-case-example" -> "kebabCaseExample"
377+
* "MIXED_Case_with_123Numbers" -> "mixedCaseWith123Numbers"
378+
* "___!!!___" -> "___!!!___"
379+
* "1000.2000.3000" -> "1000_2000_3000"
380+
* "UPPERCASE" -> "uppercase"
381+
* "alreadyCamelCased" -> "alreadyCamelCased"
382+
* "justNumbers123" -> "justNumbers123"
383+
* "Just_Special$Chars!!" -> "justSpecialChars"
384+
* "singleword" -> "singleword"
385+
* "word_with_underscores_and-dashes" -> "wordWithUnderscoresAndDashes"
386+
* "10-20-aa" -> "10_20Aa"
387+
* ```
388+
*
389+
* @return the formatted string in lowerCamelCase.
390+
*/
391+
public fun String.toCamelCaseByDelimiters(
392+
delimiters: Regex = CAMEL_DEFAULT_DELIMITERS_REGEX,
393+
numberSeparator: String = "_",
394+
): String =
395+
if (!this.any { it.isLetter() || it.isDigit() }) {
396+
this // If the string has no letters, return it unchanged
397+
} else {
398+
split(delimiters)
399+
.filter { it.isNotBlank() }
400+
.map { it.lowercase() }
401+
.joinNumbers(numberSeparator)
402+
.joinToCamelCaseString()
403+
}
404+
405+
/**
406+
* Joins consecutive numbers in a list with the given [separator].
407+
* Assumes that all numbers and strings are separated (after splitting via [CAMEL_DEFAULT_DELIMITERS_REGEX]).
408+
*/
409+
private fun List<String>.joinNumbers(separator: CharSequence): List<String> {
410+
val result = mutableListOf<String>()
411+
var i = 0
412+
413+
while (i < this.size) {
414+
val current = this[i]
415+
if (current.all { it.isDigit() }) { // Check if the current element is a number
416+
val numberGroup = mutableListOf(current)
417+
while (i + 1 < this.size && this[i + 1].all { it.isDigit() }) {
418+
numberGroup.add(this[i + 1])
419+
i++
420+
}
421+
result.add(numberGroup.joinToString(separator)) // Join consecutive numbers with "_"
422+
} else {
423+
result.add(current)
424+
}
425+
i++
426+
}
427+
return result
428+
}
345429

346-
public fun String.toCamelCaseByDelimiters(delimiters: Regex): String = split(delimiters).joinToCamelCaseString()
430+
/**
431+
* Joins a list of words into lowerCamelCase format.
432+
* - The first word is converted to lowercase.
433+
* - Subsequent words start with an uppercase letter.
434+
*/
435+
private fun List<String>.joinToCamelCaseString(): String =
436+
mapIndexed { index, word ->
437+
if (index == 0) word.lowercase() else word.replaceFirstChar { it.uppercaseChar() }
438+
}.joinToString("")
439+
440+
internal val CAMEL_LETTERS_REGEX = "(?<=[a-zA-Z])[A-Z]".toRegex()
347441

348442
internal fun String.toSnakeCase(): String =
349443
if ("[A-Z_]+".toRegex().matches(this)) {
350444
this
351445
} else {
352-
CAMEL_REGEX
446+
CAMEL_LETTERS_REGEX
353447
.replace(this) { "_${it.value}" }
354448
.replace(" ", "_")
355449
.lowercase()
356450
}
357451

358-
internal fun List<String>.joinToCamelCaseString(): String =
359-
joinToString(separator = "") { it.replaceFirstChar { it.uppercaseChar() } }
360-
.replaceFirstChar { it.lowercaseChar() }
361-
362452
/** Returns `true` if this callable is a getter-like function.
363453
*
364454
* A callable is considered getter-like if it is either a property getter,

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,13 @@ class RenameToCamelCaseTests {
179179
df.getColumnGroup("testName").columnNames() shouldBe listOf("anotherName")
180180
}
181181

182+
@Test
183+
fun `uppercase names`() {
184+
val originalDf = dataFrameOf("ID", "ITEM", "ORDER_DATE")(1, "TOY", "02.03.2009")
185+
val renamedDf = originalDf.renameToCamelCase()
186+
renamedDf.columnNames() shouldBe listOf("id", "item", "orderDate")
187+
}
188+
182189
@Test
183190
fun `doubly nested row`() {
184191
val doublyNestedColumnGroup = dataFrameOf("test_name")(
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package org.jetbrains.kotlinx.dataframe.impl
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.junit.Test
5+
6+
class ToCamelCase {
7+
@Test
8+
fun defaultDelimitersSimpleUseCases() {
9+
val testCases = listOf(
10+
"hello_world",
11+
"HelloWorld",
12+
"json.parser.Config",
13+
"my.var_name test",
14+
"thirdColumn",
15+
"someHTMLParser",
16+
"RESTApi",
17+
"OAuth2Token",
18+
"GraphQLQuery",
19+
"TCP_3_PROTOCOL",
20+
"123hello_world456",
21+
"API_Response_2023",
22+
"UPPER_case-LOWER",
23+
"12parse34CamelCase",
24+
"snake_case_example",
25+
"dot.separated.words",
26+
"kebab-case-example",
27+
"MIXED_Case_with_123Numbers",
28+
"___!!!___",
29+
"1000.2000.3000",
30+
"UPPERCASE",
31+
"alreadyCamelCased",
32+
"justNumbers123",
33+
"Just_Special\$Chars!!",
34+
"singleword",
35+
"word_with_underscores_and-dashes",
36+
"10-20-aa",
37+
"ROOM_1.11",
38+
)
39+
val expected = listOf(
40+
"helloWorld",
41+
"helloWorld",
42+
"jsonParserConfig",
43+
"myVarNameTest",
44+
"thirdColumn",
45+
"someHtmlParser",
46+
"restApi",
47+
"oAuth2Token",
48+
"graphQlQuery",
49+
"tcp3Protocol",
50+
"123HelloWorld456",
51+
"apiResponse2023",
52+
"upperCaseLower",
53+
"12Parse34CamelCase",
54+
"snakeCaseExample",
55+
"dotSeparatedWords",
56+
"kebabCaseExample",
57+
"mixedCaseWith123Numbers",
58+
"___!!!___",
59+
"1000_2000_3000",
60+
"uppercase",
61+
"alreadyCamelCased",
62+
"justNumbers123",
63+
"justSpecialChars",
64+
"singleword",
65+
"wordWithUnderscoresAndDashes",
66+
"10_20Aa",
67+
"room1_11",
68+
)
69+
70+
testCases.zip(expected).forEach { (input, expected) ->
71+
input.toCamelCaseByDelimiters() shouldBe expected
72+
}
73+
}
74+
75+
@Test
76+
fun specialCharacters() {
77+
"música_lírica".toCamelCaseByDelimiters() shouldBe "músicaLírica"
78+
"тут был Андрей".toCamelCaseByDelimiters() shouldBe "тутБылАндрей"
79+
"汉字_拼音".toCamelCaseByDelimiters() shouldBe "汉字拼音"
80+
"X Æ A-12 34".toCamelCaseByDelimiters() shouldBe "xÆA12_34"
81+
"kæt_wɪð_æk!t".toCamelCaseByDelimiters() shouldBe "kætWɪðÆkT"
82+
"Gëëxplodeerd,_of_geïntegreerd?".toCamelCaseByDelimiters() shouldBe "gëëxplodeerdOfGeïntegreerd"
83+
"Äüßergewöhnlich_könnte_flüssig_sein,_aber_wie_öfter?".toCamelCaseByDelimiters() shouldBe
84+
"äüßergewöhnlichKönnteFlüssigSeinAberWieÖfter"
85+
}
86+
}

0 commit comments

Comments
 (0)