Skip to content

Commit

Permalink
Add extensions functions to iterate over codepoints (#38)
Browse files Browse the repository at this point in the history
Co-authored-by: cketti <ck@cketti.de>
  • Loading branch information
OptimumCode and cketti authored Jun 24, 2024
1 parent 91bed49 commit bd63005
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ package de.cketti.codepoints.deluxe

import de.cketti.codepoints.codePointAt as intCodePointAt
import de.cketti.codepoints.codePointBefore as intCodePointBefore
import de.cketti.codepoints.forEachCodePoint as intForEachCodePoint
import de.cketti.codepoints.forEachCodePointIndexed as intForEachCodePointIndexed

/**
* Returns the Unicode code point at the specified index.
Expand Down Expand Up @@ -51,3 +53,24 @@ fun CharSequence.codePointSequence(): CodePointSequence {
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
return CodePointIterator(this, startIndex, endIndex)
}

/**
* Performs the given [action] for each code point in this character sequence.
*/
inline fun CharSequence.forEachCodePoint(action: (codePoint: CodePoint) -> Unit) {
intForEachCodePoint { codePoint ->
action(codePoint.toCodePoint())
}
}

/**
* Performs the given [action] for each code point in this character sequence.
*
* @param action The start index of the current code point is provided as the first argument to this function. The
* code point as [CodePoint] instance as the second argument.
*/
inline fun CharSequence.forEachCodePointIndexed(action: (index: Int, codePoint: CodePoint) -> Unit) {
intForEachCodePointIndexed { index, codePoint ->
action(index, codePoint.toCodePoint())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,53 @@ class CharSequenceExtensionsTest {
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(1))
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(3))
}

@Test
fun forEachCodepoint() {
fun CharSequence.collectCodepoints(): List<CodePoint> = buildList { forEachCodePoint { add(it) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf('a'.toCodePoint()),
"a".collectCodepoints(),
)
assertEquals(
listOf('a'.toCodePoint(), 0xFFFF.toCodePoint()),
"a\uFFFF".collectCodepoints(),
)
assertEquals(
listOf(0x1F995.toCodePoint(), 'a'.toCodePoint(), 0x1F996.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}

@Test
fun forEachCodepointIndexed() {
fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.toCodePoint()),
"a".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.toCodePoint(), 1 to 0x1F995.toCodePoint()),
"a\uD83E\uDD95".collectCodepoints(),
)
assertEquals(
listOf(
0 to 0x1F995.toCodePoint(),
2 to 'a'.toCodePoint(),
3 to 0x1F996.toCodePoint(),
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}
}
40 changes: 40 additions & 0 deletions kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,43 @@ fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
return currentIndex + 1
}
}

/**
* Performs the given [action] for each code point in this character sequence.
*/
inline fun CharSequence.forEachCodePoint(action: (codePoint: Int) -> Unit) {
forEachCodePointIndexed { _, codePoint ->
action(codePoint)
}
}

/**
* Performs the given [action] for each code point in this character sequence.
*
* @param action The start index of the current code point is provided as the first argument to this function. The code
* point value as the second argument.
*/
inline fun CharSequence.forEachCodePointIndexed(action: (index: Int, codePoint: Int) -> Unit) {
var index = 0
val endIndex = length
while (index < endIndex) {
val codePointStartIndex = index

val firstChar = this[index]
index++

if (firstChar.isHighSurrogate() && index < endIndex) {
val nextChar = this[index]
if (nextChar.isLowSurrogate()) {
index++

val codePoint = CodePoints.toCodePoint(firstChar, nextChar)
action(codePointStartIndex, codePoint)

continue
}
}

action(codePointStartIndex, firstChar.code)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,53 @@ class CharSequenceExtensionsTest {
"\uD83E\uDD95".offsetByCodePoints(index = 2, codePointOffset = -2)
}
}

@Test
fun forEachCodepoint() {
fun CharSequence.collectCodepoints(): List<Int> = buildList { forEachCodePoint { add(it) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf('a'.code),
"a".collectCodepoints(),
)
assertEquals(
listOf('a'.code, 0xFFFF),
"a\uFFFF".collectCodepoints(),
)
assertEquals(
listOf(0x1F995, 'a'.code, 0x1F996),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}

@Test
fun forEachCodepointIndexed() {
fun CharSequence.collectCodepoints(): List<Pair<Int, Int>> =
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.code),
"a".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.code, 1 to 0x1F995),
"a\uD83E\uDD95".collectCodepoints(),
)
assertEquals(
listOf(
0 to 0x1F995,
2 to 'a'.code,
3 to 0x1F996,
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}
}

0 comments on commit bd63005

Please sign in to comment.