Skip to content

Commit

Permalink
Add kotlin-codepoints-deluxe
Browse files Browse the repository at this point in the history
Uses the basic functionality in `kotlin-codepoints` to provide a nicer API to work with Unicode code points.
  • Loading branch information
cketti committed Jan 30, 2023
1 parent 39abf06 commit 1aaf04a
Show file tree
Hide file tree
Showing 8 changed files with 549 additions and 0 deletions.
79 changes: 79 additions & 0 deletions kotlin-codepoints-deluxe/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
plugins {
alias(libs.plugins.kotlin.multiplatform)
alias(libs.plugins.vanniktech.maven.publish)
}

kotlin {
androidNativeArm32()
androidNativeArm64()
androidNativeX86()
androidNativeX64()

iosArm32()
iosArm64()
iosX64()
iosSimulatorArm64()

js(IR) {
browser {}
}

jvm {
compilations.all {
kotlinOptions.jvmTarget = "1.8"
}
}

linuxArm32Hfp()
linuxArm64()
linuxMips32()
linuxMipsel32()
linuxX64()

macosX64()
macosArm64()

mingwX64()
mingwX86()

tvosArm64()
tvosX64()
tvosSimulatorArm64()

wasm32()

watchosArm32()
watchosArm64()
watchosDeviceArm64()
watchosX86()
watchosX64()
watchosSimulatorArm64()

sourceSets {
val commonMain by getting {
dependencies {
api(project(":kotlin-codepoints"))
}
}
val commonTest by getting {
dependencies {
implementation(kotlin("test"))
}
}
}
}

@Suppress("UnstableApiUsage")
mavenPublishing {
pom {
name.set("kotlin-codepoint-deluxe")
description.set("Kotlin Multiplatform (KMP) library that adds a nicer API than kotlin-codepoint for dealing with Unicode code points.")
}
}

tasks.create("publishMips") {
dependsOn(
"publishLinuxMips32PublicationToMavenCentralRepository",
"publishLinuxMipsel32PublicationToMavenCentralRepository"
)
}
129 changes: 129 additions & 0 deletions kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePoint.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package de.cketti.codepoints.deluxe

import de.cketti.codepoints.CodePoints
import kotlin.jvm.JvmInline

/**
* Represents a Unicode code point.
*
* You can create/retrieve instances of this class by using the following functions:
* - [Int.toCodePoint]
* - [Char.toCodePoint]
* - [String.codePointSequence]
* - [String.codePointIterator]
*/
@JvmInline
value class CodePoint internal constructor(val value: Int) {
init {
require(CodePoints.isValidCodePoint(value)) { "Not a valid code point" }
}

/**
* `true` if this Unicode code point is in the Basic Multilingual Plane (BMP).
*
* Such code points can be represented using a single `Char`.
*/
val isBasic: Boolean
get() = CodePoints.isBmpCodePoint(value)

/**
* `true` if this Unicode code point is in the supplementary character range.
*
* In a `String` such a code point is represented using a surrogate pair, i.e. two `Char` values.
*/
val isSupplementary: Boolean
get() = CodePoints.isSupplementaryCodePoint(value)

/**
* The number of `Char` values needed to represent this Unicode code point.
*
* If the specified code point is in the [BMP][CodePoint.isBasic], this property is `1`, otherwise it is `2`.
*/
val charCount: Int
get() = CodePoints.charCount(value)

/**
* `true` if this code point is a surrogate code unit.
*/
val isSurrogate: Boolean
get() = !isSupplementary && value.toChar().isSurrogate()

/**
* `true` if this code point is a high surrogate code unit.
*/
val isHighSurrogate: Boolean
get() = !isSupplementary && value.toChar().isHighSurrogate()

/**
* `true` if this code point is a low surrogate code unit.
*/
val isLowSurrogate: Boolean
get() = !isSupplementary && value.toChar().isLowSurrogate()

/**
* The leading surrogate (a high surrogate code unit) of the surrogate pair representing this supplementary
* Unicode code point.
*
* If this code point is not a supplementary character, an unspecified `Char` is returned.
*/
val highSurrogateChar: Char
get() = CodePoints.highSurrogate(value)

/**
* The trailing surrogate (a low surrogate code unit) of the surrogate pair representing this supplementary
* Unicode code point.
*
* If this code point is not a supplementary character, an unspecified `Char` is returned.
*/
val lowSurrogateChar: Char
get() = CodePoints.lowSurrogate(value)

/**
* Converts this Unicode code point to its UTF-16 representation stored in a char array.
*
* If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the resulting char array has the same
* value as [value]. If the specified code point is a supplementary code point, the resulting char array has the
* corresponding surrogate pair.
*/
fun toChars(): CharArray {
return CodePoints.toChars(value)
}

/**
* Converts this Unicode code point to its UTF-16 representation.
*
* If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the same value is stored in
* `destination[offset]`, and 1 is returned. If this code point is a supplementary character, its surrogate values
* are stored in `destination[offset]` (high-surrogate) and `destination[offset+1]` (low-surrogate), and 2 is
* returned.
*/
fun toChars(destination: CharArray, offset: Int): Int {
return CodePoints.toChars(value, destination, offset)
}

/**
* Returns a string representation of this code point.
*
* "U+" followed by the code point value in hexadecimal (using upper case letters), which is prepended with leading
* zeros to a minimum of four digits.
*/
override fun toString(): String {
return "U+${value.toString(16).uppercase().padStart(4, '0')}"
}
}

/**
* Returns a [CodePoint] with this value.
*
* Throws [IllegalArgumentException] if this value falls outside the range of valid code points.
*/
fun Int.toCodePoint(): CodePoint {
return CodePoint(this)
}

/**
* Returns a [CodePoint] with the same value as this `Char`.
*/
fun Char.toCodePoint(): CodePoint {
return CodePoint(this.code)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package de.cketti.codepoints.deluxe

import kotlin.jvm.JvmInline

/**
* Sequence of [CodePoint]s in the given [String].
*/
@JvmInline
value class CodePointSequence(private val text: String) : Sequence<CodePoint> {
override fun iterator(): CodePointIterator {
return CodePointIterator(text)
}
}

/**
* Iterator for [CodePoint]s in the given [String].
*/
class CodePointIterator(private val text: String) : Iterator<CodePoint> {
private var index = 0

override fun hasNext(): Boolean {
return index < text.length
}

override fun next(): CodePoint {
return text.codePointAt(index).also { codePoint ->
index += codePoint.charCount
}
}
}
50 changes: 50 additions & 0 deletions kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
@file:Suppress(
"INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers
"INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers
)
package de.cketti.codepoints.deluxe

import de.cketti.codepoints.codePointAt as intCodePointAt
import de.cketti.codepoints.codePointBefore as intCodePointBefore

/**
* Returns the Unicode code point at the specified index.
*
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
*
* If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
*
* See [codePointAt][intCodePointAt].
* ```
*/
@kotlin.internal.HidesMembers
fun String.codePointAt(index: Int): CodePoint {
return intCodePointAt(index).toCodePoint()
}

/**
* Returns the Unicode code point before the specified index.
*
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
*
* If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
*
* See [codePointBefore][intCodePointBefore].
*/
fun String.codePointBefore(index: Int): CodePoint {
return intCodePointBefore(index).toCodePoint()
}

/**
* Sequence of [CodePoint]s in this string.
*/
fun String.codePointSequence(): CodePointSequence {
return CodePointSequence(this)
}

/**
* Iterator for [CodePoint]s in this string.
*/
fun String.codePointIterator(): CodePointIterator {
return CodePointIterator(this)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package de.cketti.codepoints.deluxe

import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import kotlin.test.assertFalse
import kotlin.test.assertTrue

class CodePointSequenceTest {
@Test
fun codePointSequence() {
val codePoints = "a\uD83E\uDD95b\uD83E\uDD96c".codePointSequence().map { it.value }.toList()

assertEquals(listOf(0x0061, 0x1F995, 0x0062, 0x1F996, 0x0063), codePoints)
}

@Test
fun codePointIterator() {
val iterator = "a\uD83E\uDD95b".codePointIterator()

assertTrue(iterator.hasNext())
assertEquals('a'.toCodePoint(), iterator.next())
assertTrue(iterator.hasNext())
assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next())
assertTrue(iterator.hasNext())
assertEquals('b'.toCodePoint(), iterator.next())
assertFalse(iterator.hasNext())
assertFailsWith<IndexOutOfBoundsException> {
iterator.next()
}
}
}
Loading

0 comments on commit 1aaf04a

Please sign in to comment.