Add kotlin-codepoints-deluxe

Uses the basic functionality in `kotlin-codepoints` to provide a nicer API to work with Unicode code points.
cketti · Jan 30, 2023 · 1aaf04a · 1aaf04a
1 parent 39abf06
commit 1aaf04a
Show file tree

Hide file tree

Showing 8 changed files with 549 additions and 0 deletions.
diff --git a/kotlin-codepoints-deluxe/build.gradle.kts b/kotlin-codepoints-deluxe/build.gradle.kts
@@ -0,0 +1,79 @@
+plugins {
+    alias(libs.plugins.kotlin.multiplatform)
+    alias(libs.plugins.vanniktech.maven.publish)
+}
+
+kotlin {
+    androidNativeArm32()
+    androidNativeArm64()
+    androidNativeX86()
+    androidNativeX64()
+
+    iosArm32()
+    iosArm64()
+    iosX64()
+    iosSimulatorArm64()
+
+    js(IR) {
+        browser {}
+    }
+
+    jvm {
+        compilations.all {
+            kotlinOptions.jvmTarget = "1.8"
+        }
+    }
+
+    linuxArm32Hfp()
+    linuxArm64()
+    linuxMips32()
+    linuxMipsel32()
+    linuxX64()
+
+    macosX64()
+    macosArm64()
+
+    mingwX64()
+    mingwX86()
+
+    tvosArm64()
+    tvosX64()
+    tvosSimulatorArm64()
+
+    wasm32()
+
+    watchosArm32()
+    watchosArm64()
+    watchosDeviceArm64()
+    watchosX86()
+    watchosX64()
+    watchosSimulatorArm64()
+
+    sourceSets {
+        val commonMain by getting {
+            dependencies {
+                api(project(":kotlin-codepoints"))
+            }
+        }
+        val commonTest by getting {
+            dependencies {
+                implementation(kotlin("test"))
+            }
+        }
+    }
+}
+
+@Suppress("UnstableApiUsage")
+mavenPublishing {
+    pom {
+        name.set("kotlin-codepoint-deluxe")
+        description.set("Kotlin Multiplatform (KMP) library that adds a nicer API than kotlin-codepoint for dealing with Unicode code points.")
+    }
+}
+
+tasks.create("publishMips") {
+    dependsOn(
+        "publishLinuxMips32PublicationToMavenCentralRepository",
+        "publishLinuxMipsel32PublicationToMavenCentralRepository"
+    )
+}
diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePoint.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePoint.kt
@@ -0,0 +1,129 @@
+package de.cketti.codepoints.deluxe
+
+import de.cketti.codepoints.CodePoints
+import kotlin.jvm.JvmInline
+
+/**
+ * Represents a Unicode code point.
+ * 
+ * You can create/retrieve instances of this class by using the following functions:
+ * - [Int.toCodePoint]
+ * - [Char.toCodePoint]
+ * - [String.codePointSequence]
+ * - [String.codePointIterator]
+ */
+@JvmInline
+value class CodePoint internal constructor(val value: Int) {
+    init {
+        require(CodePoints.isValidCodePoint(value)) { "Not a valid code point" }
+    }
+
+    /**
+     * `true` if this Unicode code point is in the Basic Multilingual Plane (BMP).
+     *
+     * Such code points can be represented using a single `Char`.
+     */
+    val isBasic: Boolean
+        get() = CodePoints.isBmpCodePoint(value)
+
+    /**
+     * `true` if this Unicode code point is in the supplementary character range.
+     *
+     * In a `String` such a code point is represented using a surrogate pair, i.e. two `Char` values.
+     */
+    val isSupplementary: Boolean
+        get() = CodePoints.isSupplementaryCodePoint(value)
+
+    /**
+     * The number of `Char` values needed to represent this Unicode code point.
+     *
+     * If the specified code point is in the [BMP][CodePoint.isBasic], this property is `1`, otherwise it is `2`.
+     */
+    val charCount: Int
+        get() = CodePoints.charCount(value)
+
+    /**
+     * `true` if this code point is a surrogate code unit.
+     */
+    val isSurrogate: Boolean
+        get() = !isSupplementary && value.toChar().isSurrogate()
+
+    /**
+     * `true` if this code point is a high surrogate code unit.
+     */
+    val isHighSurrogate: Boolean
+        get() = !isSupplementary && value.toChar().isHighSurrogate()
+
+    /**
+     * `true` if this code point is a low surrogate code unit.
+     */
+    val isLowSurrogate: Boolean
+        get() = !isSupplementary && value.toChar().isLowSurrogate()
+
+    /**
+     * The leading surrogate (a high surrogate code unit) of the surrogate pair representing this supplementary
+     * Unicode code point.
+     *
+     * If this code point is not a supplementary character, an unspecified `Char` is returned.
+     */
+    val highSurrogateChar: Char
+        get() = CodePoints.highSurrogate(value)
+
+    /**
+     * The trailing surrogate (a low surrogate code unit) of the surrogate pair representing this supplementary
+     * Unicode code point.
+     *
+     * If this code point is not a supplementary character, an unspecified `Char` is returned.
+     */
+    val lowSurrogateChar: Char
+        get() = CodePoints.lowSurrogate(value)
+
+    /**
+     * Converts this Unicode code point to its UTF-16 representation stored in a char array.
+     *
+     * If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the resulting char array has the same 
+     * value as [value]. If the specified code point is a supplementary code point, the resulting char array has the 
+     * corresponding surrogate pair.
+     */
+    fun toChars(): CharArray {
+        return CodePoints.toChars(value)
+    }
+
+    /**
+     * Converts this Unicode code point to its UTF-16 representation. 
+     * 
+     * If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the same value is stored in 
+     * `destination[offset]`, and 1 is returned. If this code point is a supplementary character, its surrogate values 
+     * are stored in `destination[offset]` (high-surrogate) and `destination[offset+1]` (low-surrogate), and 2 is 
+     * returned.
+     */
+    fun toChars(destination: CharArray, offset: Int): Int {
+        return CodePoints.toChars(value, destination, offset)
+    }
+
+    /**
+     * Returns a string representation of this code point.
+     * 
+     * "U+" followed by the code point value in hexadecimal (using upper case letters), which is prepended with leading
+     * zeros to a minimum of four digits.
+     */
+    override fun toString(): String {
+        return "U+${value.toString(16).uppercase().padStart(4, '0')}"
+    }
+}
+
+/**
+ * Returns a [CodePoint] with this value.
+ * 
+ * Throws [IllegalArgumentException] if this value falls outside the range of valid code points.
+ */
+fun Int.toCodePoint(): CodePoint {
+    return CodePoint(this)
+}
+
+/**
+ * Returns a [CodePoint] with the same value as this `Char`.
+ */
+fun Char.toCodePoint(): CodePoint {
+    return CodePoint(this.code)
+}
diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt
@@ -0,0 +1,30 @@
+package de.cketti.codepoints.deluxe
+
+import kotlin.jvm.JvmInline
+
+/**
+ * Sequence of [CodePoint]s in the given [String].
+ */
+@JvmInline
+value class CodePointSequence(private val text: String) : Sequence<CodePoint> {
+    override fun iterator(): CodePointIterator {
+        return CodePointIterator(text)
+    }
+}
+
+/**
+ * Iterator for [CodePoint]s in the given [String].
+ */
+class CodePointIterator(private val text: String) : Iterator<CodePoint> {
+    private var index = 0
+
+    override fun hasNext(): Boolean {
+        return index < text.length
+    }
+
+    override fun next(): CodePoint {
+        return text.codePointAt(index).also { codePoint ->
+            index += codePoint.charCount
+        }
+    }
+}
diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt
@@ -0,0 +1,50 @@
+@file:Suppress(
+    "INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers
+    "INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers
+)
+package de.cketti.codepoints.deluxe
+
+import de.cketti.codepoints.codePointAt as intCodePointAt
+import de.cketti.codepoints.codePointBefore as intCodePointBefore
+
+/**
+ * Returns the Unicode code point at the specified index.
+ *
+ * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
+ *
+ * If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
+ * 
+ * See [codePointAt][intCodePointAt].
+ * ```
+ */
+@kotlin.internal.HidesMembers
+fun String.codePointAt(index: Int): CodePoint {
+    return intCodePointAt(index).toCodePoint()
+}
+
+/**
+ * Returns the Unicode code point before the specified index.
+ *
+ * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
+ *
+ * If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
+ * 
+ * See [codePointBefore][intCodePointBefore].
+ */
+fun String.codePointBefore(index: Int): CodePoint {
+    return intCodePointBefore(index).toCodePoint()
+}
+
+/**
+ * Sequence of [CodePoint]s in this string.
+ */
+fun String.codePointSequence(): CodePointSequence {
+    return CodePointSequence(this)
+}
+
+/**
+ * Iterator for [CodePoint]s in this string.
+ */
+fun String.codePointIterator(): CodePointIterator {
+    return CodePointIterator(this)
+}
diff --git a/kotlin-codepoints-deluxe/src/commonTest/kotlin/CodePointSequenceTest.kt b/kotlin-codepoints-deluxe/src/commonTest/kotlin/CodePointSequenceTest.kt
@@ -0,0 +1,32 @@
+package de.cketti.codepoints.deluxe
+
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertFailsWith
+import kotlin.test.assertFalse
+import kotlin.test.assertTrue
+
+class CodePointSequenceTest {
+    @Test
+    fun codePointSequence() {
+        val codePoints = "a\uD83E\uDD95b\uD83E\uDD96c".codePointSequence().map { it.value }.toList()
+
+        assertEquals(listOf(0x0061, 0x1F995, 0x0062, 0x1F996, 0x0063), codePoints)
+    }
+
+    @Test
+    fun codePointIterator() {
+        val iterator = "a\uD83E\uDD95b".codePointIterator()
+
+        assertTrue(iterator.hasNext())
+        assertEquals('a'.toCodePoint(), iterator.next())
+        assertTrue(iterator.hasNext())
+        assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next())
+        assertTrue(iterator.hasNext())
+        assertEquals('b'.toCodePoint(), iterator.next())
+        assertFalse(iterator.hasNext())
+        assertFailsWith<IndexOutOfBoundsException> {
+            iterator.next()
+        }
+    }
+}