diff --git a/src/commonImplementation/kotlin/CodePoints.kt b/src/commonImplementation/kotlin/CodePoints.kt index dfff6a2..4803da3 100644 --- a/src/commonImplementation/kotlin/CodePoints.kt +++ b/src/commonImplementation/kotlin/CodePoints.kt @@ -2,48 +2,60 @@ package de.cketti.codepoints +import de.cketti.codepoints.internal.isValidCodePoint as commonIsValidCodePoint +import de.cketti.codepoints.internal.isBmpCodePoint as commonIsBmpCodePoint +import de.cketti.codepoints.internal.isSupplementaryCodePoint as commonIsSupplementaryCodePoint +import de.cketti.codepoints.internal.charCount as commonCharCount +import de.cketti.codepoints.internal.isSurrogate as commonIsSurrogate +import de.cketti.codepoints.internal.isHighSurrogate as commonIsHighSurrogate +import de.cketti.codepoints.internal.isLowSurrogate as commonIsLowSurrogate +import de.cketti.codepoints.internal.isSurrogatePair as commonIsSurrogatePair +import de.cketti.codepoints.internal.highSurrogate as commonHighSurrogate +import de.cketti.codepoints.internal.lowSurrogate as commonLowSurrogate +import de.cketti.codepoints.internal.toCodePoint as commonToCodePoint + actual object CodePoints { - actual inline fun isValidCodePoint(codePoint: Int): Boolean { - return CommonCodePoints.isValidCodePoint(codePoint) + actual fun isValidCodePoint(codePoint: Int): Boolean { + return commonIsValidCodePoint(codePoint) } - actual inline fun isBmpCodePoint(codePoint: Int): Boolean { - return CommonCodePoints.isBmpCodePoint(codePoint) + actual fun isBmpCodePoint(codePoint: Int): Boolean { + return commonIsBmpCodePoint(codePoint) } - actual inline fun isSupplementaryCodePoint(codePoint: Int): Boolean { - return CommonCodePoints.isSupplementaryCodePoint(codePoint) + actual fun isSupplementaryCodePoint(codePoint: Int): Boolean { + return commonIsSupplementaryCodePoint(codePoint) } - actual inline fun charCount(codePoint: Int): Int { - return CommonCodePoints.charCount(codePoint) + actual fun charCount(codePoint: Int): Int { + return commonCharCount(codePoint) } - actual inline fun isSurrogate(char: Char): Boolean { - return CommonCodePoints.isSurrogate(char) + actual fun isSurrogate(char: Char): Boolean { + return commonIsSurrogate(char) } - actual inline fun isHighSurrogate(char: Char): Boolean { - return CommonCodePoints.isHighSurrogate(char) + actual fun isHighSurrogate(char: Char): Boolean { + return commonIsHighSurrogate(char) } - actual inline fun isLowSurrogate(char: Char): Boolean { - return CommonCodePoints.isLowSurrogate(char) + actual fun isLowSurrogate(char: Char): Boolean { + return commonIsLowSurrogate(char) } - actual inline fun isSurrogatePair(highSurrogate: Char, lowSurrogate: Char): Boolean { - return CommonCodePoints.isSurrogatePair(highSurrogate, lowSurrogate) + actual fun isSurrogatePair(highSurrogate: Char, lowSurrogate: Char): Boolean { + return commonIsSurrogatePair(highSurrogate, lowSurrogate) } actual fun highSurrogate(codePoint: Int): Char { - return CommonCodePoints.highSurrogate(codePoint) + return commonHighSurrogate(codePoint) } actual fun lowSurrogate(codePoint: Int): Char { - return CommonCodePoints.lowSurrogate(codePoint) + return commonLowSurrogate(codePoint) } - actual inline fun toCodePoint(highSurrogate: Char, lowSurrogate: Char): Int { - return CommonCodePoints.toCodePoint(highSurrogate, lowSurrogate) + actual fun toCodePoint(highSurrogate: Char, lowSurrogate: Char): Int { + return commonToCodePoint(highSurrogate, lowSurrogate) } } diff --git a/src/commonImplementation/kotlin/CommonCodePoints.kt b/src/commonImplementation/kotlin/CommonCodePoints.kt deleted file mode 100644 index 2546bd1..0000000 --- a/src/commonImplementation/kotlin/CommonCodePoints.kt +++ /dev/null @@ -1,61 +0,0 @@ -package de.cketti.codepoints - -object CommonCodePoints { - private const val MIN_SUPPLEMENTARY_CODE_POINT = 0x10000 - private const val MAX_CODE_POINT = 0x10FFFF - - private const val MIN_HIGH_SURROGATE = 0xD800 - private const val MAX_HIGH_SURROGATE = 0xDBFF - private const val MIN_LOW_SURROGATE = 0xDC00 - private const val MAX_LOW_SURROGATE = 0xDFFF - - private const val SURROGATE_DECODE_OFFSET = - MIN_SUPPLEMENTARY_CODE_POINT - (MIN_HIGH_SURROGATE shl 10) - MIN_LOW_SURROGATE - - private const val HIGH_SURROGATE_ENCODE_OFFSET = - (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)) - - fun isValidCodePoint(codePoint: Int): Boolean { - return codePoint in 0..MAX_CODE_POINT - } - - fun isBmpCodePoint(codePoint: Int): Boolean { - return codePoint ushr 16 == 0 - } - - fun isSupplementaryCodePoint(codePoint: Int): Boolean { - return codePoint in MIN_SUPPLEMENTARY_CODE_POINT..MAX_CODE_POINT - } - - fun charCount(codePoint: Int): Int { - return if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) 1 else 2 - } - - fun isSurrogate(char: Char): Boolean { - return char.code in MIN_HIGH_SURROGATE..MAX_LOW_SURROGATE - } - - fun isHighSurrogate(char: Char): Boolean { - return char.code in MIN_HIGH_SURROGATE..MAX_HIGH_SURROGATE - } - - fun isLowSurrogate(char: Char): Boolean { - return char.code in MIN_LOW_SURROGATE..MAX_LOW_SURROGATE - } - - fun isSurrogatePair(highSurrogate: Char, lowSurrogate: Char): Boolean { - return isHighSurrogate(highSurrogate) && isLowSurrogate(lowSurrogate) - } - - fun highSurrogate(codePoint: Int): Char { - return ((codePoint ushr 10) + HIGH_SURROGATE_ENCODE_OFFSET).toChar() - } - - fun lowSurrogate(codePoint: Int): Char { - return ((codePoint and 0x3FF) + MIN_LOW_SURROGATE).toChar() - } - - fun toCodePoint(highSurrogate: Char, lowSurrogate: Char): Int { - return (highSurrogate.code shl 10) + lowSurrogate.code + SURROGATE_DECODE_OFFSET - } -} diff --git a/src/commonImplementation/kotlin/CommonStringFunctions.kt b/src/commonImplementation/kotlin/CommonStringFunctions.kt deleted file mode 100644 index 51b8c98..0000000 --- a/src/commonImplementation/kotlin/CommonStringFunctions.kt +++ /dev/null @@ -1,94 +0,0 @@ -package de.cketti.codepoints - -import de.cketti.codepoints.CommonCodePoints.isHighSurrogate -import de.cketti.codepoints.CommonCodePoints.isLowSurrogate -import de.cketti.codepoints.CommonCodePoints.toCodePoint - -object CommonStringFunctions { - fun codePointAt(text: String, index: Int): Int { - if (index !in text.indices) throw IndexOutOfBoundsException() - - val firstChar = text[index] - if (isHighSurrogate(firstChar) && index + 1 < text.length) { - val nextChar = text[index + 1] - if (isLowSurrogate(nextChar)) { - return toCodePoint(firstChar, nextChar) - } - } - - return firstChar.code - } - - fun codePointBefore(text: String, index: Int): Int { - val startIndex = index - 1 - if (startIndex !in text.indices) throw IndexOutOfBoundsException() - - val firstChar = text[startIndex] - if (isLowSurrogate(firstChar) && startIndex - 1 >= 0) { - val previousChar = text[startIndex - 1] - if (isHighSurrogate(previousChar)) { - return toCodePoint(previousChar, firstChar) - } - } - - return firstChar.code - } - - fun codePointCount(text: String, beginIndex: Int, endIndex: Int): Int { - if (beginIndex < 0 || endIndex > text.length || beginIndex > endIndex) throw IndexOutOfBoundsException() - - var index = beginIndex - var count = 0 - do { - val firstChar = text[index] - index++ - if (isHighSurrogate(firstChar) && index < endIndex) { - val nextChar = text[index] - if (isLowSurrogate(nextChar)) { - index++ - } - } - - count++ - } while (index < endIndex) - - return count - } - - fun offsetByCodePoints(text: String, index: Int, codePointOffset: Int): Int { - if (index !in 0..text.length) throw IndexOutOfBoundsException() - if (codePointOffset == 0) return index - - if (codePointOffset > 0) { - var currentIndex = index - repeat(codePointOffset) { - if (currentIndex > text.lastIndex) throw IndexOutOfBoundsException() - val firstChar = text[currentIndex] - currentIndex++ - if (isHighSurrogate(firstChar) && currentIndex <= text.lastIndex) { - val nextChar = text[currentIndex] - if (isLowSurrogate(nextChar)) { - currentIndex++ - } - } - } - - return currentIndex - } else { - var currentIndex = index - 1 - repeat(-codePointOffset) { - if (currentIndex < 0) throw IndexOutOfBoundsException() - val firstChar = text[currentIndex] - currentIndex-- - if (isLowSurrogate(firstChar) && currentIndex >= 0) { - val previousChar = text[currentIndex] - if (isHighSurrogate(previousChar)) { - currentIndex-- - } - } - } - - return currentIndex + 1 - } - } -} diff --git a/src/commonImplementation/kotlin/StringExtensions.kt b/src/commonImplementation/kotlin/StringExtensions.kt index eef41e2..b14af41 100644 --- a/src/commonImplementation/kotlin/StringExtensions.kt +++ b/src/commonImplementation/kotlin/StringExtensions.kt @@ -2,18 +2,23 @@ package de.cketti.codepoints -actual inline fun String.codePointAt(index: Int): Int { - return CommonStringFunctions.codePointAt(this, index) +import de.cketti.codepoints.internal.codePointAt as commonCodePointAt +import de.cketti.codepoints.internal.codePointBefore as commonCodePointBefore +import de.cketti.codepoints.internal.codePointCount as commonCodePointCount +import de.cketti.codepoints.internal.offsetByCodePoints as commonOffsetByCodePoints + +actual fun String.codePointAt(index: Int): Int { + return commonCodePointAt(this, index) } -actual inline fun String.codePointBefore(index: Int): Int { - return CommonStringFunctions.codePointBefore(this, index) +actual fun String.codePointBefore(index: Int): Int { + return commonCodePointBefore(this, index) } -actual inline fun String.codePointCount(beginIndex: Int, endIndex: Int): Int { - return CommonStringFunctions.codePointCount(this, beginIndex, endIndex) +actual fun String.codePointCount(beginIndex: Int, endIndex: Int): Int { + return commonCodePointCount(this, beginIndex, endIndex) } -actual inline fun String.offsetByCodePoints(index: Int, codePointOffset: Int): Int { - return CommonStringFunctions.offsetByCodePoints(this, index, codePointOffset) +actual fun String.offsetByCodePoints(index: Int, codePointOffset: Int): Int { + return commonOffsetByCodePoints(this, index, codePointOffset) } diff --git a/src/commonImplementation/kotlin/internal/CommonCodePoints.kt b/src/commonImplementation/kotlin/internal/CommonCodePoints.kt new file mode 100644 index 0000000..806daf0 --- /dev/null +++ b/src/commonImplementation/kotlin/internal/CommonCodePoints.kt @@ -0,0 +1,59 @@ +package de.cketti.codepoints.internal + +private const val MIN_SUPPLEMENTARY_CODE_POINT = 0x10000 +private const val MAX_CODE_POINT = 0x10FFFF + +private const val MIN_HIGH_SURROGATE = 0xD800 +private const val MAX_HIGH_SURROGATE = 0xDBFF +private const val MIN_LOW_SURROGATE = 0xDC00 +private const val MAX_LOW_SURROGATE = 0xDFFF + +private const val SURROGATE_DECODE_OFFSET = + MIN_SUPPLEMENTARY_CODE_POINT - (MIN_HIGH_SURROGATE shl 10) - MIN_LOW_SURROGATE + +private const val HIGH_SURROGATE_ENCODE_OFFSET = + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)) + +internal fun isValidCodePoint(codePoint: Int): Boolean { + return codePoint in 0..MAX_CODE_POINT +} + +internal fun isBmpCodePoint(codePoint: Int): Boolean { + return codePoint ushr 16 == 0 +} + +internal fun isSupplementaryCodePoint(codePoint: Int): Boolean { + return codePoint in MIN_SUPPLEMENTARY_CODE_POINT..MAX_CODE_POINT +} + +internal fun charCount(codePoint: Int): Int { + return if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) 1 else 2 +} + +internal fun isSurrogate(char: Char): Boolean { + return char.code in MIN_HIGH_SURROGATE..MAX_LOW_SURROGATE +} + +internal fun isHighSurrogate(char: Char): Boolean { + return char.code in MIN_HIGH_SURROGATE..MAX_HIGH_SURROGATE +} + +internal fun isLowSurrogate(char: Char): Boolean { + return char.code in MIN_LOW_SURROGATE..MAX_LOW_SURROGATE +} + +internal fun isSurrogatePair(highSurrogate: Char, lowSurrogate: Char): Boolean { + return isHighSurrogate(highSurrogate) && isLowSurrogate(lowSurrogate) +} + +internal fun highSurrogate(codePoint: Int): Char { + return ((codePoint ushr 10) + HIGH_SURROGATE_ENCODE_OFFSET).toChar() +} + +internal fun lowSurrogate(codePoint: Int): Char { + return ((codePoint and 0x3FF) + MIN_LOW_SURROGATE).toChar() +} + +internal fun toCodePoint(highSurrogate: Char, lowSurrogate: Char): Int { + return (highSurrogate.code shl 10) + lowSurrogate.code + SURROGATE_DECODE_OFFSET +} diff --git a/src/commonImplementation/kotlin/internal/CommonStringFunctions.kt b/src/commonImplementation/kotlin/internal/CommonStringFunctions.kt new file mode 100644 index 0000000..4fd6a23 --- /dev/null +++ b/src/commonImplementation/kotlin/internal/CommonStringFunctions.kt @@ -0,0 +1,88 @@ +package de.cketti.codepoints.internal + +internal fun codePointAt(text: String, index: Int): Int { + if (index !in text.indices) throw IndexOutOfBoundsException() + + val firstChar = text[index] + if (isHighSurrogate(firstChar) && index + 1 < text.length) { + val nextChar = text[index + 1] + if (isLowSurrogate(nextChar)) { + return toCodePoint(firstChar, nextChar) + } + } + + return firstChar.code +} + +internal fun codePointBefore(text: String, index: Int): Int { + val startIndex = index - 1 + if (startIndex !in text.indices) throw IndexOutOfBoundsException() + + val firstChar = text[startIndex] + if (isLowSurrogate(firstChar) && startIndex - 1 >= 0) { + val previousChar = text[startIndex - 1] + if (isHighSurrogate(previousChar)) { + return toCodePoint(previousChar, firstChar) + } + } + + return firstChar.code +} + +internal fun codePointCount(text: String, beginIndex: Int, endIndex: Int): Int { + if (beginIndex < 0 || endIndex > text.length || beginIndex > endIndex) throw IndexOutOfBoundsException() + + var index = beginIndex + var count = 0 + do { + val firstChar = text[index] + index++ + if (isHighSurrogate(firstChar) && index < endIndex) { + val nextChar = text[index] + if (isLowSurrogate(nextChar)) { + index++ + } + } + + count++ + } while (index < endIndex) + + return count +} + +internal fun offsetByCodePoints(text: String, index: Int, codePointOffset: Int): Int { + if (index !in 0..text.length) throw IndexOutOfBoundsException() + if (codePointOffset == 0) return index + + if (codePointOffset > 0) { + var currentIndex = index + repeat(codePointOffset) { + if (currentIndex > text.lastIndex) throw IndexOutOfBoundsException() + val firstChar = text[currentIndex] + currentIndex++ + if (isHighSurrogate(firstChar) && currentIndex <= text.lastIndex) { + val nextChar = text[currentIndex] + if (isLowSurrogate(nextChar)) { + currentIndex++ + } + } + } + + return currentIndex + } else { + var currentIndex = index - 1 + repeat(-codePointOffset) { + if (currentIndex < 0) throw IndexOutOfBoundsException() + val firstChar = text[currentIndex] + currentIndex-- + if (isLowSurrogate(firstChar) && currentIndex >= 0) { + val previousChar = text[currentIndex] + if (isHighSurrogate(previousChar)) { + currentIndex-- + } + } + } + + return currentIndex + 1 + } +} diff --git a/src/jvmTest/kotlin/PlatformComparisonTest.kt b/src/jvmTest/kotlin/PlatformComparisonTest.kt index 7c9e4e8..9dc5d30 100644 --- a/src/jvmTest/kotlin/PlatformComparisonTest.kt +++ b/src/jvmTest/kotlin/PlatformComparisonTest.kt @@ -2,6 +2,11 @@ package de.cketti.codepoints +import de.cketti.codepoints.codePointAt as commonCodePointAt +import de.cketti.codepoints.codePointBefore as commonCodePointBefore +import de.cketti.codepoints.codePointCount as commonCodePointCount +import de.cketti.codepoints.offsetByCodePoints as commonOffsetByCodePoints + import org.junit.jupiter.api.Test import kotlin.test.assertEquals @@ -11,8 +16,8 @@ class PlatformComparisonTest { val text = "a\uD83E\uDD95\uD83E\uDD96b\uDD96\uD83Ec\uD83Ed\uDD96e\uD83E" for (index in text.indices) { - val jvmCodePointAt = (text as java.lang.String).codePointAt(index) - val commonCodePointAt = CommonStringFunctions.codePointAt(text, index) + val jvmCodePointAt = text.codePointAt(index) + val commonCodePointAt = text.commonCodePointAt(index) assertEquals(jvmCodePointAt, commonCodePointAt, "codePointAt($index)") } } @@ -22,8 +27,8 @@ class PlatformComparisonTest { val text = "\uDD95a\uD83E\uDD95\uD83E\uDD96b\uDD96\uD83Ec\uD83Ed\uDD96" for (index in 1..text.length) { - val jvmCodePointBefore = (text as java.lang.String).codePointBefore(index) - val commonCodePointBefore = CommonStringFunctions.codePointBefore(text, index) + val jvmCodePointBefore = text.codePointBefore(index) + val commonCodePointBefore = text.commonCodePointBefore(index) assertEquals(jvmCodePointBefore, commonCodePointBefore, "codePointBefore($index)") } } @@ -34,8 +39,8 @@ class PlatformComparisonTest { for (beginIndex in 0 until text.lastIndex) { for (endIndex in (beginIndex + 1)..text.lastIndex) { - val jvmCodePointCount = (text as java.lang.String).codePointCount(beginIndex, endIndex) - val commonCodePointCount = CommonStringFunctions.codePointCount(text, beginIndex, endIndex) + val jvmCodePointCount = text.codePointCount(beginIndex, endIndex) + val commonCodePointCount = text.commonCodePointCount(beginIndex, endIndex) assertEquals(jvmCodePointCount, commonCodePointCount, "codePointCount($beginIndex, $endIndex)") } } @@ -48,13 +53,13 @@ class PlatformComparisonTest { for (index in 0 until text.lastIndex) { for (codePointOffset in 0..(text.length - index)) { val jvmResult = try { - Result.success((text as java.lang.String).offsetByCodePoints(index, codePointOffset)) + Result.success(text.offsetByCodePoints(index, codePointOffset)) } catch (e: Exception) { Result.failure(e) } val commonResult = try { - Result.success(CommonStringFunctions.offsetByCodePoints(text, index, codePointOffset)) + Result.success(text.commonOffsetByCodePoints(index, codePointOffset)) } catch (e: Exception) { Result.failure(e) } @@ -69,13 +74,13 @@ class PlatformComparisonTest { for (index in text.length downTo 1) { for (codePointOffset in 0 downTo -(text.length - index)) { val jvmResult = try { - Result.success((text as java.lang.String).offsetByCodePoints(index, codePointOffset)) + Result.success(text.offsetByCodePoints(index, codePointOffset)) } catch (e: Exception) { Result.failure(e) } val commonResult = try { - Result.success(CommonStringFunctions.offsetByCodePoints(text, index, codePointOffset)) + Result.success(text.commonOffsetByCodePoints(index, codePointOffset)) } catch (e: Exception) { Result.failure(e) }