Skip to content

Commit

Permalink
Ensure XML parser uses response encoding (#672)
Browse files Browse the repository at this point in the history
  • Loading branch information
jocmp authored Jan 4, 2025
1 parent 88eff2f commit 7e266fc
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ internal class DefaultParser(

private fun tryXmlParse(input: ParserInput): Document? {
return try {
Jsoup.parse(input.inputStream(), null, "", JsoupParser.xmlParser())
Jsoup.parse(input.inputStream(), input.charset?.toString(), "", JsoupParser.xmlParser())
} catch (e: IOException) {
null
}
Expand Down
10 changes: 6 additions & 4 deletions rssparser/src/test/kotlin/com/jocmp/rssparser/BaseParserTest.kt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package com.jocmp.rssparser

import com.jocmp.rssparser.model.RssItem
import com.jocmp.rssparser.model.ItunesChannelData
import com.jocmp.rssparser.model.ItunesItemData
import com.jocmp.rssparser.model.RssChannel
import com.jocmp.rssparser.model.RssImage
import com.jocmp.rssparser.model.ItunesItemData
import com.jocmp.rssparser.model.ItunesChannelData
import com.jocmp.rssparser.model.RssItem
import kotlinx.coroutines.test.runTest
import java.nio.charset.Charset
import kotlin.test.BeforeTest
import kotlin.test.Test
import kotlin.test.assertEquals
Expand Down Expand Up @@ -38,14 +39,15 @@ abstract class BaseParserTest(
val articleCategories: List<String> = emptyList(),
val articleCommentsUrl: String? = null,
val articleItunesData: ItunesItemData? = null,
val charset: Charset? = null
) {

private lateinit var channel: RssChannel
private lateinit var article: RssItem

@BeforeTest
fun setUp() = runTest {
val input = readFileFromResources(feedPath)
val input = readFileFromResources(feedPath, charset = charset)
channel = ParserFactory.build().parse(input)
article = channel.items[0]
}
Expand Down
6 changes: 4 additions & 2 deletions rssparser/src/test/kotlin/com/jocmp/rssparser/TestUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package com.jocmp.rssparser

import com.jocmp.rssparser.internal.ParserInput
import java.io.File
import java.nio.charset.Charset

internal fun readFileFromResources(
resourceName: String
resourceName: String,
charset: Charset? = null,
): ParserInput {
val file = File("src/test/resources/$resourceName")

return ParserInput(file.readBytes(), charset = null)
return ParserInput(file.readBytes(), charset = charset)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.jocmp.rssparser.rss

import com.jocmp.rssparser.BaseParserTest
import com.jocmp.rssparser.model.RssImage
import java.nio.charset.Charset

class XmlParserAccentsTest : BaseParserTest(
charset = Charset.forName("ISO-8859-1"),
feedPath = "feed-test-accents.xml",
channelTitle = "UOL Noticias",
channelLink = "http://noticias.uol.com.br/",
channelImage = RssImage(
url = "http://rss.i.uol.com.br/uol_rss.gif"
),
channelDescription = "Últimas Notícias",
articleTitle = "Artur Jorge não é mais técnico do Botafogo",
articleLink = "https://noticias.uol.com.br/ultimas-noticias/afp/2025/01/03/artur-jorge-nao-e-mais-tecnico-do-botafogo.htm",
articlePubDate = "Sex, 03 Jan 2025 23:47:02 -0300",
articleDescription = "O português Artur Jorge não continuará como treinador do Botafogo, anunciou nesta sexta-feira (3) o atual campeão brasileiro e da Copa Libertadores.",
)
105 changes: 105 additions & 0 deletions rssparser/src/test/resources/feed-test-accents.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7e266fc

Please sign in to comment.