Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ksoup-lite variant #77

Merged
merged 6 commits into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ jobs:
matrix:
buildType:
- "common"
- "lite"
- "kotlinx"
- "korlibs"
- "ktor2"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
{ target: windows, os: windows-latest, tasks: mingwX64Test, continueOnError: false },
{ target: linux, os: ubuntu-latest, tasks: linuxX64Test, continueOnError: false },
]
libBuildType: [ "korlibs", "kotlinx", "okio", "ktor2" ]
libBuildType: [ "lite", "korlibs", "kotlinx", "okio", "ktor2" ]
runs-on: ${{ matrix.config.os }}
name: Build ${{ matrix.config.target }} with libBuildType=${{ matrix.libBuildType }}
steps:
Expand Down
4 changes: 2 additions & 2 deletions gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ android.nonTransitiveRClass=true
kotlin.native.ignoreIncorrectDependencies=true
kotlin.mpp.enableCInteropCommonization=true
kotlin.mpp.applyDefaultHierarchyTemplate=false
# dev, common, kotlinx, korlibs, okio, ktor2
# dev, common, lite, kotlinx, korlibs, okio, ktor2
# dev will include all modules in settings.gradle.kts but use kotlinx dep for engine
libBuildType=okio
libBuildType=lite


SONATYPE_HOST=CENTRAL_PORTAL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,9 @@ package com.fleeksoft.ksoup.engine

import com.fleeksoft.ksoup.io.Charset
import com.fleeksoft.ksoup.io.FileSource
import com.fleeksoft.ksoup.io.SourceReader

interface KsoupEngine {

fun openSourceReader(content: String, charset: Charset? = null): SourceReader

fun openSourceReader(byteArray: ByteArray): SourceReader

fun getUtf8Charset(): Charset

fun charsetForName(name: String): Charset
Expand Down
2 changes: 2 additions & 0 deletions ksoup-engine-common/src/com/fleeksoft/ksoup/io/Charset.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ interface Charset {

fun decode(stringBuilder: StringBuilder, byteArray: ByteArray, start: Int, end: Int): Int
fun toByteArray(value: String): ByteArray

fun onlyUtf8(): Boolean = false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.fleeksoft.ksoup.io

internal class SourceReaderByteArray(bytes: ByteArray) : SourceReader {
private var source: ByteArray = bytes
private var currentPosition: Int = 0
private var markedPosition: Int? = null
private var isClosed: Boolean = false

override fun mark(readLimit: Long) {
markedPosition = currentPosition
}

override fun reset() {
isClosed = false
markedPosition?.let {
currentPosition = it
markedPosition = null
}
}


override fun readBytes(count: Int): ByteArray {
val byteArray = ByteArray(count)
var i = 0
while (exhausted().not() && i < count) {
byteArray[i] = source[currentPosition++]
i++
}
return if (i == 0) {
byteArrayOf()
} else if (i != count) {
byteArray.sliceArray(0 until i)
} else {
byteArray
}
}

override fun read(bytes: ByteArray, offset: Int, length: Int): Int {
var i = offset
while (exhausted().not() && i < length) {
bytes[i] = source[currentPosition++]
i++
}
return i
}

override fun readAllBytes(): ByteArray {
return readBytes(source.size - currentPosition)
}

override fun exhausted(): Boolean {
return currentPosition >= source.size
}

override fun close() {
// on reset we need bytes again
// source = ByteArray(0)
// currentPosition = 0
// markedPosition = null
isClosed = true
}

override fun readAtMostTo(sink: KByteBuffer, byteCount: Int): Int {
val bytes = readBytes(byteCount)
sink.writeBytes(bytes, bytes.size)
return bytes.size
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package com.fleeksoft.ksoup.io

fun SourceReader.Companion.from(byteArray: ByteArray): SourceReader = SourceReaderByteArray(byteArray)
6 changes: 3 additions & 3 deletions ksoup-engine-korlibs/src/com/fleeksoft/ksoup/EngineExt.kt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.fleeksoft.ksoup

import com.fleeksoft.ksoup.engine.KsoupEngineImpl
import com.fleeksoft.ksoup.io.SourceReader
import com.fleeksoft.ksoup.io.from
import korlibs.io.compression.deflate.GZIP
import korlibs.io.compression.uncompress
import korlibs.io.file.VfsFile
Expand All @@ -16,8 +16,8 @@ suspend fun VfsFile.openStream(): SourceReader {
val zipped =
(byteArray.size == 2 && byteArray[0].toInt() == 31 && byteArray[1].toInt() == -117) // gzip magic bytes 31(0x1f), -117(0x1f)
if (zipped) {
return KsoupEngineImpl.openSourceReader(this.readAsSyncStream().readAll().uncompress(GZIP))
return SourceReader.from(this.readAsSyncStream().readAll().uncompress(GZIP))
}
}
return KsoupEngineImpl.openSourceReader(this.readAll())
return SourceReader.from(this.readAll())
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@ import korlibs.io.lang.Charsets

object KsoupEngineImpl : KsoupEngine {

override fun openSourceReader(content: String, charset: Charset?): SourceReader {
return SourceReader.from(charset?.toByteArray(content) ?: content.encodeToByteArray())
}

override fun openSourceReader(byteArray: ByteArray): SourceReader {
return SourceReader.from(byteArray)
}

override fun getUtf8Charset(): Charset {
return CharsetImpl(Charsets.UTF8)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import korlibs.io.file.VfsFile
import korlibs.io.stream.*


fun SourceReader.Companion.from(byteArray: ByteArray): SourceReader = SourceReaderImpl(byteArray)
fun SourceReader.Companion.from(syncStream: SyncStream): SourceReader = SourceReaderImpl(syncStream)
suspend fun SourceReader.Companion.from(asyncInputStream: AsyncInputStream): SourceReader =
SourceReaderImpl(asyncInputStream.toAsyncStream().toSyncOrNull() ?: asyncInputStream.readAll().openSync())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ import io.ktor.utils.io.charsets.*

object KsoupEngineImpl : KsoupEngine {

override fun openSourceReader(content: String, charset: Charset?): SourceReader {
return SourceReader.from(charset?.toByteArray(content) ?: content.encodeToByteArray())
}

override fun openSourceReader(byteArray: ByteArray): SourceReader {
return SourceReader.from(byteArray)
}

override fun getUtf8Charset(): Charset {
return CharsetImpl(Charsets.UTF_8)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import kotlinx.io.Source
import kotlinx.io.files.Path


fun SourceReader.Companion.from(byteArray: ByteArray): SourceReader = SourceReaderImpl(byteArray)
fun SourceReader.Companion.from(source: Source): SourceReader = SourceReaderImpl(source)
fun SourceReader.Companion.from(bodyChannel: ByteReadChannel): SourceReader = SourceReaderImpl(bodyChannel.readBuffer)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ import io.ktor.utils.io.charsets.*

object KsoupEngineImpl : KsoupEngine {

override fun openSourceReader(content: String, charset: Charset?): SourceReader {
return SourceReader.from(charset?.toByteArray(content) ?: content.encodeToByteArray())
}

override fun openSourceReader(byteArray: ByteArray): SourceReader {
return SourceReader.from(byteArray)
}

override fun getUtf8Charset(): Charset {
return CharsetImpl(Charsets.UTF_8)
}
Expand Down
2 changes: 0 additions & 2 deletions ksoup-engine-ktor2/src/com/fleeksoft/ksoup/io/SourceExt.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ import kotlinx.io.Source
import kotlinx.io.files.Path


fun SourceReader.Companion.from(byteArray: ByteArray): SourceReader = SourceReaderImpl(byteArray)
fun SourceReader.Companion.from(source: Source): SourceReader = SourceReaderImpl(source)


fun FileSource.Companion.from(file: Path): FileSource = FileSourceImpl(file)
fun FileSource.Companion.from(filePath: String): FileSource = FileSourceImpl(filePath)
37 changes: 37 additions & 0 deletions ksoup-engine-lite/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
plugins {
alias(libs.plugins.mavenPublish)
}

group = "com.fleeksoft.ksoup"
version = libs.versions.libraryVersion.get()

val artifactId = "ksoup-engine-lite"
mavenPublishing {
coordinates("com.fleeksoft.ksoup", artifactId, libs.versions.libraryVersion.get())
pom {
name.set(artifactId)
description.set("Ksoup is a Kotlin Multiplatform library for working with HTML and XML, and offers an easy-to-use API for URL fetching, data parsing, extraction, and manipulation using DOM and CSS selectors.")
licenses {
license {
name.set("Apache-2.0")
url.set("https://opensource.org/licenses/Apache-2.0")
}
}
url.set("https://github.com/fleeksoft/ksoup")
issueManagement {
system.set("Github")
url.set("https://github.com/fleeksoft/ksoup/issues")
}
scm {
connection.set("https://github.com/fleeksoft/ksoup.git")
url.set("https://github.com/fleeksoft/ksoup")
}
developers {
developer {
name.set("Sabeeh Ul Hussnain Anjum")
email.set("fleeksoft@gmail.com")
organization.set("Fleek Soft")
}
}
}
}
11 changes: 11 additions & 0 deletions ksoup-engine-lite/module.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
product:
type: lib
platforms: [ jvm, js, wasm, android, linuxX64, linuxArm64, tvosArm64, tvosX64, tvosSimulatorArm64, macosX64, macosArm64, iosArm64, iosSimulatorArm64, iosX64, mingwX64 ]

apply: [ ../common.module-template.yaml ]

aliases:
- jvmAndAndroid: [ jvm, android ]

dependencies:
- ../ksoup-engine-common
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.fleeksoft.ksoup.engine

import com.fleeksoft.ksoup.io.*

object KsoupEngineImpl : KsoupEngine {

override fun getUtf8Charset(): Charset {
return CharsetImpl("UTF-8")
}

override fun charsetForName(name: String): Charset {
return CharsetImpl(name)
}

override fun pathToFileSource(path: String): FileSource {
TODO("File Source not supported in lite")
}
}
57 changes: 57 additions & 0 deletions ksoup-engine-lite/src/com/fleeksoft/ksoup/io/CharsetImpl.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package com.fleeksoft.ksoup.io

import kotlin.math.max


class CharsetImpl(override val name: String) : Charset {
init {
require(name.lowercase() == "utf8" || name.lowercase() == "utf-8" || name.lowercase() == "iso-8859-1" || name.lowercase() == "ascii" || name.lowercase() == "us-ascii") {
"Charset $name not supported"
}
}

override fun onlyUtf8(): Boolean = true

override fun decode(stringBuilder: StringBuilder, byteArray: ByteArray, start: Int, end: Int): Int {
if (end <= 0) return 0
var incompleteByteIndex = -1

val isUtf8 = name.lowercase() == "utf-8" || name.lowercase() == "utf8"
if (isUtf8) {
// TODO:// may be we can use this for other charsets
val startIndex = if (end > 4) end - 4 else 0
var i = startIndex
while (i < end) {
val byteLength = guessByteSequenceLength(byteArray[i])
if (byteLength > 1 && (i + byteLength) > end) {
incompleteByteIndex = i
break
} else {
i += max(byteLength, 1)
}
}
}
val toDecodeSize = if (incompleteByteIndex > 0) {
incompleteByteIndex
} else {
end
}

stringBuilder.append(byteArray.sliceArray(start until toDecodeSize).decodeToString())
return toDecodeSize - start
}

private fun guessByteSequenceLength(byte: Byte): Int {
return when ((byte.toInt() and 0xFF) shr 4) {
in 0b0000..0b0111 -> 1
in 0b1100..0b1101 -> 2
0b1110 -> 3
0b1111 -> 4
else -> 0
}
}

override fun toByteArray(value: String): ByteArray {
return value.encodeToByteArray()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.fleeksoft.ksoup.io

import java.io.File
import java.io.InputStream

// todo for jvm we can use streaming api in lite module
fun SourceReader.Companion.from(inputStream: InputStream): SourceReader = SourceReader.from(inputStream.readAllBytes())
fun FileSource.Companion.from(file: File): FileSource = TODO("File Source not supported in lite")
fun FileSource.Companion.from(file: String): FileSource = TODO("File Source not supported in lite")
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ import io.ktor.utils.io.charsets.*

object KsoupEngineImpl : KsoupEngine {

override fun openSourceReader(content: String, charset: Charset?): SourceReader {
return SourceReader.from(charset?.toByteArray(content) ?: content.encodeToByteArray())
}

override fun openSourceReader(byteArray: ByteArray): SourceReader {
return SourceReader.from(byteArray)
}

override fun getUtf8Charset(): Charset {
return CharsetImpl(Charsets.UTF_8)
}
Expand Down
1 change: 0 additions & 1 deletion ksoup-engine-okio/src/com/fleeksoft/ksoup/io/SourceExt.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import okio.Path
import okio.Source


fun SourceReader.Companion.from(byteArray: ByteArray): SourceReader = SourceReaderImpl(byteArray)
fun SourceReader.Companion.from(source: Source): SourceReader = SourceReaderImpl(source)


Expand Down
Loading
Loading