-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Uses the basic functionality in `kotlin-codepoints` to provide a nicer API to work with Unicode code points.
- Loading branch information
Showing
8 changed files
with
549 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
plugins { | ||
alias(libs.plugins.kotlin.multiplatform) | ||
alias(libs.plugins.vanniktech.maven.publish) | ||
} | ||
|
||
kotlin { | ||
androidNativeArm32() | ||
androidNativeArm64() | ||
androidNativeX86() | ||
androidNativeX64() | ||
|
||
iosArm32() | ||
iosArm64() | ||
iosX64() | ||
iosSimulatorArm64() | ||
|
||
js(IR) { | ||
browser {} | ||
} | ||
|
||
jvm { | ||
compilations.all { | ||
kotlinOptions.jvmTarget = "1.8" | ||
} | ||
} | ||
|
||
linuxArm32Hfp() | ||
linuxArm64() | ||
linuxMips32() | ||
linuxMipsel32() | ||
linuxX64() | ||
|
||
macosX64() | ||
macosArm64() | ||
|
||
mingwX64() | ||
mingwX86() | ||
|
||
tvosArm64() | ||
tvosX64() | ||
tvosSimulatorArm64() | ||
|
||
wasm32() | ||
|
||
watchosArm32() | ||
watchosArm64() | ||
watchosDeviceArm64() | ||
watchosX86() | ||
watchosX64() | ||
watchosSimulatorArm64() | ||
|
||
sourceSets { | ||
val commonMain by getting { | ||
dependencies { | ||
api(project(":kotlin-codepoints")) | ||
} | ||
} | ||
val commonTest by getting { | ||
dependencies { | ||
implementation(kotlin("test")) | ||
} | ||
} | ||
} | ||
} | ||
|
||
@Suppress("UnstableApiUsage") | ||
mavenPublishing { | ||
pom { | ||
name.set("kotlin-codepoint-deluxe") | ||
description.set("Kotlin Multiplatform (KMP) library that adds a nicer API than kotlin-codepoint for dealing with Unicode code points.") | ||
} | ||
} | ||
|
||
tasks.create("publishMips") { | ||
dependsOn( | ||
"publishLinuxMips32PublicationToMavenCentralRepository", | ||
"publishLinuxMipsel32PublicationToMavenCentralRepository" | ||
) | ||
} |
129 changes: 129 additions & 0 deletions
129
kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePoint.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package de.cketti.codepoints.deluxe | ||
|
||
import de.cketti.codepoints.CodePoints | ||
import kotlin.jvm.JvmInline | ||
|
||
/** | ||
* Represents a Unicode code point. | ||
* | ||
* You can create/retrieve instances of this class by using the following functions: | ||
* - [Int.toCodePoint] | ||
* - [Char.toCodePoint] | ||
* - [String.codePointSequence] | ||
* - [String.codePointIterator] | ||
*/ | ||
@JvmInline | ||
value class CodePoint internal constructor(val value: Int) { | ||
init { | ||
require(CodePoints.isValidCodePoint(value)) { "Not a valid code point" } | ||
} | ||
|
||
/** | ||
* `true` if this Unicode code point is in the Basic Multilingual Plane (BMP). | ||
* | ||
* Such code points can be represented using a single `Char`. | ||
*/ | ||
val isBasic: Boolean | ||
get() = CodePoints.isBmpCodePoint(value) | ||
|
||
/** | ||
* `true` if this Unicode code point is in the supplementary character range. | ||
* | ||
* In a `String` such a code point is represented using a surrogate pair, i.e. two `Char` values. | ||
*/ | ||
val isSupplementary: Boolean | ||
get() = CodePoints.isSupplementaryCodePoint(value) | ||
|
||
/** | ||
* The number of `Char` values needed to represent this Unicode code point. | ||
* | ||
* If the specified code point is in the [BMP][CodePoint.isBasic], this property is `1`, otherwise it is `2`. | ||
*/ | ||
val charCount: Int | ||
get() = CodePoints.charCount(value) | ||
|
||
/** | ||
* `true` if this code point is a surrogate code unit. | ||
*/ | ||
val isSurrogate: Boolean | ||
get() = !isSupplementary && value.toChar().isSurrogate() | ||
|
||
/** | ||
* `true` if this code point is a high surrogate code unit. | ||
*/ | ||
val isHighSurrogate: Boolean | ||
get() = !isSupplementary && value.toChar().isHighSurrogate() | ||
|
||
/** | ||
* `true` if this code point is a low surrogate code unit. | ||
*/ | ||
val isLowSurrogate: Boolean | ||
get() = !isSupplementary && value.toChar().isLowSurrogate() | ||
|
||
/** | ||
* The leading surrogate (a high surrogate code unit) of the surrogate pair representing this supplementary | ||
* Unicode code point. | ||
* | ||
* If this code point is not a supplementary character, an unspecified `Char` is returned. | ||
*/ | ||
val highSurrogateChar: Char | ||
get() = CodePoints.highSurrogate(value) | ||
|
||
/** | ||
* The trailing surrogate (a low surrogate code unit) of the surrogate pair representing this supplementary | ||
* Unicode code point. | ||
* | ||
* If this code point is not a supplementary character, an unspecified `Char` is returned. | ||
*/ | ||
val lowSurrogateChar: Char | ||
get() = CodePoints.lowSurrogate(value) | ||
|
||
/** | ||
* Converts this Unicode code point to its UTF-16 representation stored in a char array. | ||
* | ||
* If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the resulting char array has the same | ||
* value as [value]. If the specified code point is a supplementary code point, the resulting char array has the | ||
* corresponding surrogate pair. | ||
*/ | ||
fun toChars(): CharArray { | ||
return CodePoints.toChars(value) | ||
} | ||
|
||
/** | ||
* Converts this Unicode code point to its UTF-16 representation. | ||
* | ||
* If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the same value is stored in | ||
* `destination[offset]`, and 1 is returned. If this code point is a supplementary character, its surrogate values | ||
* are stored in `destination[offset]` (high-surrogate) and `destination[offset+1]` (low-surrogate), and 2 is | ||
* returned. | ||
*/ | ||
fun toChars(destination: CharArray, offset: Int): Int { | ||
return CodePoints.toChars(value, destination, offset) | ||
} | ||
|
||
/** | ||
* Returns a string representation of this code point. | ||
* | ||
* "U+" followed by the code point value in hexadecimal (using upper case letters), which is prepended with leading | ||
* zeros to a minimum of four digits. | ||
*/ | ||
override fun toString(): String { | ||
return "U+${value.toString(16).uppercase().padStart(4, '0')}" | ||
} | ||
} | ||
|
||
/** | ||
* Returns a [CodePoint] with this value. | ||
* | ||
* Throws [IllegalArgumentException] if this value falls outside the range of valid code points. | ||
*/ | ||
fun Int.toCodePoint(): CodePoint { | ||
return CodePoint(this) | ||
} | ||
|
||
/** | ||
* Returns a [CodePoint] with the same value as this `Char`. | ||
*/ | ||
fun Char.toCodePoint(): CodePoint { | ||
return CodePoint(this.code) | ||
} |
30 changes: 30 additions & 0 deletions
30
kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package de.cketti.codepoints.deluxe | ||
|
||
import kotlin.jvm.JvmInline | ||
|
||
/** | ||
* Sequence of [CodePoint]s in the given [String]. | ||
*/ | ||
@JvmInline | ||
value class CodePointSequence(private val text: String) : Sequence<CodePoint> { | ||
override fun iterator(): CodePointIterator { | ||
return CodePointIterator(text) | ||
} | ||
} | ||
|
||
/** | ||
* Iterator for [CodePoint]s in the given [String]. | ||
*/ | ||
class CodePointIterator(private val text: String) : Iterator<CodePoint> { | ||
private var index = 0 | ||
|
||
override fun hasNext(): Boolean { | ||
return index < text.length | ||
} | ||
|
||
override fun next(): CodePoint { | ||
return text.codePointAt(index).also { codePoint -> | ||
index += codePoint.charCount | ||
} | ||
} | ||
} |
50 changes: 50 additions & 0 deletions
50
kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
@file:Suppress( | ||
"INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers | ||
"INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers | ||
) | ||
package de.cketti.codepoints.deluxe | ||
|
||
import de.cketti.codepoints.codePointAt as intCodePointAt | ||
import de.cketti.codepoints.codePointBefore as intCodePointBefore | ||
|
||
/** | ||
* Returns the Unicode code point at the specified index. | ||
* | ||
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string. | ||
* | ||
* If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException]. | ||
* | ||
* See [codePointAt][intCodePointAt]. | ||
* ``` | ||
*/ | ||
@kotlin.internal.HidesMembers | ||
fun String.codePointAt(index: Int): CodePoint { | ||
return intCodePointAt(index).toCodePoint() | ||
} | ||
|
||
/** | ||
* Returns the Unicode code point before the specified index. | ||
* | ||
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string. | ||
* | ||
* If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException]. | ||
* | ||
* See [codePointBefore][intCodePointBefore]. | ||
*/ | ||
fun String.codePointBefore(index: Int): CodePoint { | ||
return intCodePointBefore(index).toCodePoint() | ||
} | ||
|
||
/** | ||
* Sequence of [CodePoint]s in this string. | ||
*/ | ||
fun String.codePointSequence(): CodePointSequence { | ||
return CodePointSequence(this) | ||
} | ||
|
||
/** | ||
* Iterator for [CodePoint]s in this string. | ||
*/ | ||
fun String.codePointIterator(): CodePointIterator { | ||
return CodePointIterator(this) | ||
} |
32 changes: 32 additions & 0 deletions
32
kotlin-codepoints-deluxe/src/commonTest/kotlin/CodePointSequenceTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package de.cketti.codepoints.deluxe | ||
|
||
import kotlin.test.Test | ||
import kotlin.test.assertEquals | ||
import kotlin.test.assertFailsWith | ||
import kotlin.test.assertFalse | ||
import kotlin.test.assertTrue | ||
|
||
class CodePointSequenceTest { | ||
@Test | ||
fun codePointSequence() { | ||
val codePoints = "a\uD83E\uDD95b\uD83E\uDD96c".codePointSequence().map { it.value }.toList() | ||
|
||
assertEquals(listOf(0x0061, 0x1F995, 0x0062, 0x1F996, 0x0063), codePoints) | ||
} | ||
|
||
@Test | ||
fun codePointIterator() { | ||
val iterator = "a\uD83E\uDD95b".codePointIterator() | ||
|
||
assertTrue(iterator.hasNext()) | ||
assertEquals('a'.toCodePoint(), iterator.next()) | ||
assertTrue(iterator.hasNext()) | ||
assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next()) | ||
assertTrue(iterator.hasNext()) | ||
assertEquals('b'.toCodePoint(), iterator.next()) | ||
assertFalse(iterator.hasNext()) | ||
assertFailsWith<IndexOutOfBoundsException> { | ||
iterator.next() | ||
} | ||
} | ||
} |
Oops, something went wrong.