Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JSON Feed support #405

Merged
merged 1 commit into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions rssparser/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
plugins {
id("java-library")
id("com.google.devtools.ksp") version libs.versions.ksp
alias(libs.plugins.jetbrains.kotlin.jvm)
}

Expand All @@ -12,6 +13,9 @@ dependencies {
implementation(libs.okhttp.client)
implementation(libs.kotlinx.coroutines.core)
implementation(libs.jsoup)
implementation(libs.moshi)
implementation(libs.moshi.converter)
ksp(libs.moshi.kotlin.codegen)
testImplementation(kotlin("test"))
testImplementation(kotlin("test-common"))
testImplementation(kotlin("test-annotations-common"))
Expand Down
37 changes: 15 additions & 22 deletions rssparser/src/main/kotlin/com/prof18/rssparser/RssParser.kt
Original file line number Diff line number Diff line change
@@ -1,51 +1,44 @@
package com.prof18.rssparser

import com.prof18.rssparser.exception.RssParsingException
import com.prof18.rssparser.internal.XmlFetcher
import com.prof18.rssparser.internal.XmlParser
import com.prof18.rssparser.internal.Fetcher
import com.prof18.rssparser.internal.Parser
import com.prof18.rssparser.internal.ParserInput
import com.prof18.rssparser.model.RssChannel
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.withContext
import kotlin.coroutines.CoroutineContext

class RssParser internal constructor(
private val xmlFetcher: XmlFetcher,
private val xmlParser: XmlParser,
private val fetcher: Fetcher,
private val parser: Parser,
) {

private val coroutineContext: CoroutineContext =
SupervisorJob() + Dispatchers.Default

internal interface Builder {
/**
* Creates a [RssParser] object
*/
fun build(): RssParser
}

/**
* Downloads and parses an RSS feed from an [url] and returns an [RssChannel].
*
* If the parsing fails because the XML is malformed, it will re-download the XML as a string,
* clean it up and try to parse it again. If it fails again, it will throw an [RssParsingException].
*/
suspend fun getRssChannel(url: String): RssChannel = withContext(coroutineContext) {
val parserInput = xmlFetcher.fetchXml(url)
return@withContext try {
xmlParser.parseXML(parserInput)
} catch (_: RssParsingException) {
val xmlAsString = xmlFetcher.fetchXmlAsString(url)
val input = xmlParser.generateParserInputFromString(xmlAsString)
xmlParser.parseXML(input)
}
val parserInput = fetcher.fetch(url)
return@withContext parser.parse(parserInput)
}

/**
* Parses an RSS feed provided by [rawRssFeed] and returns an [RssChannel]
*/
suspend fun parse(rawRssFeed: String): RssChannel = withContext(coroutineContext) {
val parserInput = xmlParser.generateParserInputFromString(rawRssFeed)
return@withContext xmlParser.parseXML(parserInput)
val parserInput = generateParserInputFromString(rawRssFeed)
return@withContext parser.parse(parserInput)
}

private fun generateParserInputFromString(rawRssFeed: String): ParserInput {
val cleanedXml = rawRssFeed.trim()
val inputStream = cleanedXml.byteInputStream(Charsets.UTF_8)
return ParserInput.from(inputStream)
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.prof18.rssparser

import com.prof18.rssparser.internal.DefaultXmlFetcher
import com.prof18.rssparser.internal.DefaultXmlParser
import com.prof18.rssparser.internal.DefaultFetcher
import com.prof18.rssparser.internal.DefaultParser
import kotlinx.coroutines.Dispatchers
import okhttp3.Call
import okhttp3.OkHttpClient
Expand All @@ -23,11 +23,10 @@ class RssParserBuilder(
override fun build(): RssParser {
val client = callFactory
return RssParser(
xmlFetcher = DefaultXmlFetcher(
fetcher = DefaultFetcher(
callFactory = client,
),
xmlParser = DefaultXmlParser(
charset = charset,
parser = DefaultParser(
dispatcher = Dispatchers.IO,
),
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package com.prof18.rssparser.internal

import com.prof18.rssparser.exception.HttpException
import kotlinx.coroutines.suspendCancellableCoroutine
import okhttp3.Call
import okhttp3.Callback
import okhttp3.Request
import okhttp3.Response
import java.io.IOException
import java.io.InputStream
import kotlin.coroutines.resume
import kotlin.coroutines.resumeWithException

internal class DefaultFetcher(
private val callFactory: Call.Factory,
) : Fetcher {
override suspend fun fetch(url: String): ParserInput {
val request = createRequest(url)
return callFactory.newCall(request).awaitForInputStream()
}

private fun createRequest(url: String): Request =
Request.Builder()
.url(url)
.build()

private suspend fun Call.awaitForInputStream(): ParserInput =
suspendCancellableCoroutine { continuation ->
continuation.invokeOnCancellation {
cancel()
}

enqueue(object : Callback {
override fun onResponse(call: Call, response: Response) {
if (response.isSuccessful) {
val body = requireNotNull(response.body)
continuation.resume(
ParserInput(body.bytes())
)
} else {
val exception = HttpException(
code = response.code,
message = response.message,
)
continuation.resumeWithException(exception)
}
}

override fun onFailure(call: Call, e: IOException) {
continuation.resumeWithException(e)
}
})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package com.prof18.rssparser.internal

import com.prof18.rssparser.exception.RssParsingException
import com.prof18.rssparser.internal.atom.AtomFeedHandler
import com.prof18.rssparser.internal.atom.AtomKeyword
import com.prof18.rssparser.internal.json.JsonFeedHandler
import com.prof18.rssparser.internal.json.models.Feed
import com.prof18.rssparser.internal.rdf.RdfFeedHandler
import com.prof18.rssparser.internal.rdf.RdfKeyword
import com.prof18.rssparser.internal.rss.RssFeedHandler
import com.prof18.rssparser.internal.rss.RssKeyword
import com.prof18.rssparser.model.RssChannel
import com.squareup.moshi.Moshi
import com.squareup.moshi.adapter
import kotlinx.coroutines.CoroutineDispatcher
import kotlinx.coroutines.withContext
import okio.IOException
import okio.buffer
import okio.source
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.parser.Parser as JsoupParser

internal class DefaultParser(
private val dispatcher: CoroutineDispatcher,
) : Parser {
override suspend fun parse(input: ParserInput): RssChannel {
return withContext(dispatcher) {

val handler = findHandler(input)

if (handler == null) {
throw RssParsingException(
message = "Could not find top-level RSS node",
cause = null
)
}

handler.build()

}
}

private fun findHandler(input: ParserInput): FeedHandler? {
val document = tryXmlParse(input) ?: return null

val handler = document.children().firstNotNullOfOrNull { node ->
when (node.tagName()) {
RssKeyword.Rss.value -> {
RssFeedHandler(document)
}

AtomKeyword.Atom.value -> {
AtomFeedHandler(node)
}

RdfKeyword.Rdf.value -> {
RdfFeedHandler(node)
}

else -> tryParseJson(input)
}
}

return handler ?: tryParseJson(input)
}
}

private fun tryXmlParse(input: ParserInput): Document? {
return try {
Jsoup.parse(input.inputStream(), null, "", JsoupParser.xmlParser())
} catch (e: IOException) {
null
}
}

@OptIn(ExperimentalStdlibApi::class)
private fun tryParseJson(input: ParserInput): FeedHandler? {
return try {
val moshi = Moshi
.Builder()
.build()

val feed = moshi.adapter<Feed>()
.fromJson(input.inputStream().source().buffer()) ?: return null

JsonFeedHandler(feed)
} catch (e: IOException) {
null
}
}

This file was deleted.

Loading
Loading