Skip to content

Commit

Permalink
Don't break CDATA titles for local feeds (#720)
Browse files Browse the repository at this point in the history
Fixes a parser issue where CDATA was parsed as
escaped UTF-8 characters instead of plaintext.

The fix is to use Jsoup.clean instead of Android's
textutil implementation
  • Loading branch information
jocmp authored Jan 14, 2025
1 parent 4bc6440 commit e555963
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ class NotificationHelper(
return
}

sendGroupNotification()

notifications.forEach {
sendNotification(it)
}

sendGroupNotification()
}

fun dismissNotifications(ids: List<String>) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ import androidx.compose.ui.layout.ContentScale
import androidx.compose.ui.platform.LocalDensity
import androidx.compose.ui.platform.LocalHapticFeedback
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.AnnotatedString
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.text.fromHtml
import androidx.compose.ui.text.style.TextDirection
import androidx.compose.ui.text.style.TextOverflow
import androidx.compose.ui.tooling.preview.Preview
Expand Down Expand Up @@ -107,7 +105,7 @@ fun ArticleRow(
ArticleListItem(
headlineContent = {
Text(
AnnotatedString.fromHtml(article.title),
article.title,
fontWeight = FontWeight.Bold,
)
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.jocmp.capy.accounts.local

import com.jocmp.capy.common.escapingHTMLCharacters
import com.jocmp.rssparser.model.RssItem
import org.jsoup.Jsoup
import org.jsoup.safety.Safelist
Expand Down Expand Up @@ -35,7 +34,7 @@ internal class ParsedItem(private val item: RssItem, private val siteURL: String
}

val title: String
get() = Jsoup.parse(item.title.orEmpty()).text().escapingHTMLCharacters
get() = Jsoup.clean(item.title.orEmpty(), Safelist.none())

val imageURL: String?
get() = cleanedURL(item.image)?.toString()
Expand Down
48 changes: 0 additions & 48 deletions capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt
Original file line number Diff line number Diff line change
Expand Up @@ -29,51 +29,3 @@ val String.unescapingHTMLCharacters: String
.replace("&lt;", "<")
.replace("&gt;", ">")
}

/**
* Returns an HTML escaped representation of the given plain text.
*
* Copied from android.text.Html
*/
val String.escapingHTMLCharacters: String
get() {
val out = java.lang.StringBuilder()
withinStyle(out, this, 0, this.length)
return out.toString()
}

private fun withinStyle(out: StringBuilder, text: CharSequence, start: Int, end: Int) {
var i: Int = start
while (i < end) {
val c: Char = text.get(i)

if (c == '<') {
out.append("&lt;")
} else if (c == '>') {
out.append("&gt;")
} else if (c == '&') {
out.append("&amp;")
} else if (c.code in 0xD800..0xDFFF) {
if (c.code < 0xDC00 && i + 1 < end) {
val d: Char = text.get(i + 1)
if (d.code in 0xDC00..0xDFFF) {
i++
val codepoint = 0x010000 or (c.code - 0xD800 shl 10) or d.code - 0xDC00
out.append("&#").append(codepoint).append(";")
}
}
} else if (c.code > 0x7E || c < ' ') {
out.append("&#").append(c.code).append(";")
} else if (c == ' ') {
while (i + 1 < end && text.get(i + 1) == ' ') {
out.append("&nbsp;")
i++
}

out.append(' ')
} else {
out.append(c)
}
i++
}
}
25 changes: 21 additions & 4 deletions capy/src/test/java/com/jocmp/capy/accounts/local/ParsedItemTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class ParsedItemTest {

@Test
fun title_withNestedHTML() {
val title = "<mark>The `&lt;details&gt;` and `&lt;summary&gt;` elements are getting an upgrade</mark>"
val title =
"<mark>The `&lt;details&gt;` and `&lt;summary&gt;` elements are getting an upgrade</mark>"

val item = RssItem.Builder().title(title).build()
val parsedItem = ParsedItem(item, siteURL = "")
Expand All @@ -37,6 +38,19 @@ class ParsedItemTest {
)
}

@Test
fun title_withNestedHTMLAndNonAsciiText() {
val title = "<![CDATA[ 分析:美國五角大樓將騰訊列入涉軍名單的影響及信號 ]]>"

val item = RssItem.Builder().title(title).build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(
expected = "分析:美國五角大樓將騰訊列入涉軍名單的影響及信號",
actual = parsedItem.title,
)
}

@Test
fun title_whenNull() {
val item = RssItem.Builder().title(null).build()
Expand Down Expand Up @@ -122,11 +136,14 @@ class ParsedItemTest {

@Test
fun url_withGoogleAlertsFeed() {
val articleURL = "https://www.androidcentral.com/apps-software/google-squashes-a-few-pixel-bugs-in-android-15-qpr2-beta-2-1"
val link = "https://www.google.com/url?rct=j&sa=t&url=$articleURL&ct=ga&cd=CAIyGmNmNDdiZGVhOWNiNDUxZTA6Y29tOmVuOlVT&usg=AOvVaw0NIyLHLSRUIwSMg9anVWrG"
val articleURL =
"https://www.androidcentral.com/apps-software/google-squashes-a-few-pixel-bugs-in-android-15-qpr2-beta-2-1"
val link =
"https://www.google.com/url?rct=j&sa=t&url=$articleURL&ct=ga&cd=CAIyGmNmNDdiZGVhOWNiNDUxZTA6Y29tOmVuOlVT&usg=AOvVaw0NIyLHLSRUIwSMg9anVWrG"

val item = RssItem.Builder().link(link).build()
val parsedItem = ParsedItem(item, siteURL = "https://www.google.com/alerts/feeds/12345/12345")
val parsedItem =
ParsedItem(item, siteURL = "https://www.google.com/alerts/feeds/12345/12345")

assertEquals(expected = articleURL, actual = parsedItem.url)
}
Expand Down

0 comments on commit e555963

Please sign in to comment.