From 7e2deb9f8b8421e4a94006940d31ad8c21519196 Mon Sep 17 00:00:00 2001 From: Josiah Campbell <9521010+jocmp@users.noreply.github.com> Date: Sun, 12 Jan 2025 14:54:16 -0600 Subject: [PATCH] Parse url query param from Google Alerts feeds --- capy/src/main/java/com/jocmp/capy/Account.kt | 2 +- .../jocmp/capy/accounts/LocalOkHttpClient.kt | 3 --- .../jocmp/capy/accounts/local/ArticleURL.kt | 27 +++++++++++++++++++ .../{ => local}/LocalAccountDelegate.kt | 7 +++-- .../capy/accounts/{ => local}/ParsedItem.kt | 10 +++++-- .../test/java/com/jocmp/capy/OPMLFileTest.kt | 2 +- .../capy/accounts/local/ArticleURLTest.kt | 26 ++++++++++++++++++ .../{ => local}/LocalAccountDelegateTest.kt | 9 +++++-- .../accounts/{ => local}/ParsedItemTest.kt | 18 +++++++++++-- .../com/jocmp/capy/opml/OPMLImporterTest.kt | 2 +- 10 files changed, 92 insertions(+), 14 deletions(-) create mode 100644 capy/src/main/java/com/jocmp/capy/accounts/local/ArticleURL.kt rename capy/src/main/java/com/jocmp/capy/accounts/{ => local}/LocalAccountDelegate.kt (97%) rename capy/src/main/java/com/jocmp/capy/accounts/{ => local}/ParsedItem.kt (86%) create mode 100644 capy/src/test/java/com/jocmp/capy/accounts/local/ArticleURLTest.kt rename capy/src/test/java/com/jocmp/capy/accounts/{ => local}/LocalAccountDelegateTest.kt (96%) rename capy/src/test/java/com/jocmp/capy/accounts/{ => local}/ParsedItemTest.kt (81%) diff --git a/capy/src/main/java/com/jocmp/capy/Account.kt b/capy/src/main/java/com/jocmp/capy/Account.kt index 8be9884c..979e4b89 100644 --- a/capy/src/main/java/com/jocmp/capy/Account.kt +++ b/capy/src/main/java/com/jocmp/capy/Account.kt @@ -3,12 +3,12 @@ package com.jocmp.capy import com.jocmp.capy.accounts.AddFeedResult import com.jocmp.capy.accounts.AutoDelete import com.jocmp.capy.accounts.FaviconFetcher -import com.jocmp.capy.accounts.LocalAccountDelegate import com.jocmp.capy.accounts.LocalOkHttpClient import com.jocmp.capy.accounts.Source import com.jocmp.capy.accounts.asOPML import com.jocmp.capy.accounts.feedbin.FeedbinAccountDelegate import com.jocmp.capy.accounts.feedbin.FeedbinOkHttpClient +import com.jocmp.capy.accounts.local.LocalAccountDelegate import com.jocmp.capy.accounts.reader.buildReaderDelegate import com.jocmp.capy.articles.ArticleContent import com.jocmp.capy.articles.UnreadSortOrder diff --git a/capy/src/main/java/com/jocmp/capy/accounts/LocalOkHttpClient.kt b/capy/src/main/java/com/jocmp/capy/accounts/LocalOkHttpClient.kt index 8ee5694b..22d8a361 100644 --- a/capy/src/main/java/com/jocmp/capy/accounts/LocalOkHttpClient.kt +++ b/capy/src/main/java/com/jocmp/capy/accounts/LocalOkHttpClient.kt @@ -1,12 +1,9 @@ package com.jocmp.capy.accounts -import com.jocmp.capy.UserAgentInterceptor -import okhttp3.Cache import okhttp3.Interceptor import okhttp3.OkHttpClient import okhttp3.Response import okhttp3.brotli.BrotliInterceptor -import java.io.File import java.net.URI internal object LocalOkHttpClient { diff --git a/capy/src/main/java/com/jocmp/capy/accounts/local/ArticleURL.kt b/capy/src/main/java/com/jocmp/capy/accounts/local/ArticleURL.kt new file mode 100644 index 00000000..7fa0f9ad --- /dev/null +++ b/capy/src/main/java/com/jocmp/capy/accounts/local/ArticleURL.kt @@ -0,0 +1,27 @@ +package com.jocmp.capy.accounts.local + +import okhttp3.HttpUrl.Companion.toHttpUrlOrNull +import java.net.MalformedURLException +import java.net.URL + +internal object ArticleURL { + internal fun parse(url: URL): URL { + return googleAlertURL(url) ?: url + } + + private fun googleAlertURL(url: URL): URL? { + if (url.host != GOOGLE_ALERTS_DOMAIN) { + return null + } + + val articleURLParam = url.toHttpUrlOrNull()?.queryParameter("url") ?: return null + + return try { + URL(articleURLParam) + } catch (e: MalformedURLException) { + null + } + } + + private const val GOOGLE_ALERTS_DOMAIN = "www.google.com" +} diff --git a/capy/src/main/java/com/jocmp/capy/accounts/LocalAccountDelegate.kt b/capy/src/main/java/com/jocmp/capy/accounts/local/LocalAccountDelegate.kt similarity index 97% rename from capy/src/main/java/com/jocmp/capy/accounts/LocalAccountDelegate.kt rename to capy/src/main/java/com/jocmp/capy/accounts/local/LocalAccountDelegate.kt index 053864e3..ba2a6e50 100644 --- a/capy/src/main/java/com/jocmp/capy/accounts/LocalAccountDelegate.kt +++ b/capy/src/main/java/com/jocmp/capy/accounts/local/LocalAccountDelegate.kt @@ -1,8 +1,11 @@ -package com.jocmp.capy.accounts +package com.jocmp.capy.accounts.local import com.jocmp.capy.AccountDelegate import com.jocmp.capy.ArticleFilter import com.jocmp.capy.Feed +import com.jocmp.capy.accounts.AddFeedResult +import com.jocmp.capy.accounts.FaviconFetcher +import com.jocmp.capy.accounts.FeedOption import com.jocmp.capy.common.TimeHelpers.nowUTC import com.jocmp.capy.common.TimeHelpers.published import com.jocmp.capy.common.transactionWithErrorHandling @@ -23,7 +26,7 @@ import java.net.UnknownHostException import java.time.ZonedDateTime import com.jocmp.feedfinder.parser.Feed as ParserFeed -class LocalAccountDelegate( +internal class LocalAccountDelegate( private val database: Database, private val httpClient: OkHttpClient, private val faviconFetcher: FaviconFetcher, diff --git a/capy/src/main/java/com/jocmp/capy/accounts/ParsedItem.kt b/capy/src/main/java/com/jocmp/capy/accounts/local/ParsedItem.kt similarity index 86% rename from capy/src/main/java/com/jocmp/capy/accounts/ParsedItem.kt rename to capy/src/main/java/com/jocmp/capy/accounts/local/ParsedItem.kt index cdc7f6a6..f250c86e 100644 --- a/capy/src/main/java/com/jocmp/capy/accounts/ParsedItem.kt +++ b/capy/src/main/java/com/jocmp/capy/accounts/local/ParsedItem.kt @@ -1,4 +1,4 @@ -package com.jocmp.capy.accounts +package com.jocmp.capy.accounts.local import com.jocmp.capy.common.escapingHTMLCharacters import com.jocmp.rssparser.model.RssItem @@ -8,7 +8,7 @@ import java.net.URI import java.net.URL internal class ParsedItem(private val item: RssItem, private val siteURL: String?) { - val url: String? = cleanedURL(item.link)?.toString() + val url: String? = articleURL() val id: String? = url ?: item.guid @@ -40,6 +40,12 @@ internal class ParsedItem(private val item: RssItem, private val siteURL: String val imageURL: String? get() = cleanedURL(item.image)?.toString() + private fun articleURL(): String? { + val link = cleanedURL(item.link) ?: return null + + return ArticleURL.parse(link).toString() + } + private fun cleanedURL(inputURL: String?): URL? { val url = inputURL.orEmpty() diff --git a/capy/src/test/java/com/jocmp/capy/OPMLFileTest.kt b/capy/src/test/java/com/jocmp/capy/OPMLFileTest.kt index ac568b39..65d43b2b 100644 --- a/capy/src/test/java/com/jocmp/capy/OPMLFileTest.kt +++ b/capy/src/test/java/com/jocmp/capy/OPMLFileTest.kt @@ -1,7 +1,7 @@ package com.jocmp.capy import com.jocmp.capy.accounts.FakeFaviconFetcher -import com.jocmp.capy.accounts.LocalAccountDelegate +import com.jocmp.capy.accounts.local.LocalAccountDelegate import com.jocmp.capy.db.Database import com.jocmp.capy.fixtures.AccountFixture import com.jocmp.capy.fixtures.FeedFixture diff --git a/capy/src/test/java/com/jocmp/capy/accounts/local/ArticleURLTest.kt b/capy/src/test/java/com/jocmp/capy/accounts/local/ArticleURLTest.kt new file mode 100644 index 00000000..76306995 --- /dev/null +++ b/capy/src/test/java/com/jocmp/capy/accounts/local/ArticleURLTest.kt @@ -0,0 +1,26 @@ +package com.jocmp.capy.accounts.local + +import java.net.URL +import kotlin.test.Test +import kotlin.test.assertEquals + +class ArticleURLTest { + @Test + fun `entry URL is the same as article URL`() { + val entryURL = URL("https://www.theverge.com/2025/1/12/24340818/robot-vacuum-innovations-roborock-dreame-ecovacs-ces2025") + + val url = ArticleURL.parse(url = entryURL) + + assertEquals(expected = entryURL.toString(), actual = url.toString()) + } + + @Test + fun `with a Google Alert entry URL it returns the url param`() { + val entryURL = URL("https://www.google.com/url?rct=j&sa=t&url=https://www.androidheadlines.com/2025/01/meta-sued-for-allegedly-training-ai-with-content-from-pirated-books.html&ct=ga&cd=CAIyGjQ2MjY4NTIwYjAzMGNkMzc6Y29tOmVuOlVT&usg=AOvVaw2Ez54Yz16bwLLLX_YLfwA2") + val articleURL = URL("https://www.androidheadlines.com/2025/01/meta-sued-for-allegedly-training-ai-with-content-from-pirated-books.html") + + val url = ArticleURL.parse(url = entryURL) + + assertEquals(expected = articleURL.toString(), actual = url.toString()) + } +} diff --git a/capy/src/test/java/com/jocmp/capy/accounts/LocalAccountDelegateTest.kt b/capy/src/test/java/com/jocmp/capy/accounts/local/LocalAccountDelegateTest.kt similarity index 96% rename from capy/src/test/java/com/jocmp/capy/accounts/LocalAccountDelegateTest.kt rename to capy/src/test/java/com/jocmp/capy/accounts/local/LocalAccountDelegateTest.kt index 036f4d0f..c4d144f0 100644 --- a/capy/src/test/java/com/jocmp/capy/accounts/LocalAccountDelegateTest.kt +++ b/capy/src/test/java/com/jocmp/capy/accounts/local/LocalAccountDelegateTest.kt @@ -1,8 +1,10 @@ -package com.jocmp.capy.accounts +package com.jocmp.capy.accounts.local import com.jocmp.capy.AccountDelegate import com.jocmp.capy.ArticleFilter import com.jocmp.capy.InMemoryDatabaseProvider +import com.jocmp.capy.accounts.AddFeedResult +import com.jocmp.capy.accounts.FakeFaviconFetcher import com.jocmp.capy.db.Database import com.jocmp.capy.fixtures.FeedFixture import com.jocmp.capy.logging.CapyLog @@ -118,7 +120,10 @@ class LocalAccountDelegateTest { FeedFixture(database).create(feedID = channel.link!!) - delegate.refresh(ArticleFilter.default(), cutoffDate = ZonedDateTime.of(2024, 5, 1, 8, 0, 0, 0, ZoneOffset.UTC)) + delegate.refresh( + ArticleFilter.default(), + cutoffDate = ZonedDateTime.of(2024, 5, 1, 8, 0, 0, 0, ZoneOffset.UTC) + ) val articlesCount = database .articlesQueries diff --git a/capy/src/test/java/com/jocmp/capy/accounts/ParsedItemTest.kt b/capy/src/test/java/com/jocmp/capy/accounts/local/ParsedItemTest.kt similarity index 81% rename from capy/src/test/java/com/jocmp/capy/accounts/ParsedItemTest.kt rename to capy/src/test/java/com/jocmp/capy/accounts/local/ParsedItemTest.kt index e593aede..c92efff7 100644 --- a/capy/src/test/java/com/jocmp/capy/accounts/ParsedItemTest.kt +++ b/capy/src/test/java/com/jocmp/capy/accounts/local/ParsedItemTest.kt @@ -1,4 +1,4 @@ -package com.jocmp.capy.accounts +package com.jocmp.capy.accounts.local import com.jocmp.rssparser.model.RssItem import kotlin.test.Test @@ -31,7 +31,10 @@ class ParsedItemTest { val item = RssItem.Builder().title(title).build() val parsedItem = ParsedItem(item, siteURL = "") - assertEquals(expected = "The `<details>` and `<summary>` elements are getting an upgrade", actual = parsedItem.title) + assertEquals( + expected = "The `<details>` and `<summary>` elements are getting an upgrade", + actual = parsedItem.title + ) } @Test @@ -116,4 +119,15 @@ class ParsedItemTest { assertEquals(expected = "https://example.com/article", actual = parsedItem.url) } + + @Test + fun url_withGoogleAlertsFeed() { + val articleURL = "https://www.androidcentral.com/apps-software/google-squashes-a-few-pixel-bugs-in-android-15-qpr2-beta-2-1" + val link = "https://www.google.com/url?rct=j&sa=t&url=$articleURL&ct=ga&cd=CAIyGmNmNDdiZGVhOWNiNDUxZTA6Y29tOmVuOlVT&usg=AOvVaw0NIyLHLSRUIwSMg9anVWrG" + + val item = RssItem.Builder().link(link).build() + val parsedItem = ParsedItem(item, siteURL = "https://www.google.com/alerts/feeds/12345/12345") + + assertEquals(expected = articleURL, actual = parsedItem.url) + } } diff --git a/capy/src/test/java/com/jocmp/capy/opml/OPMLImporterTest.kt b/capy/src/test/java/com/jocmp/capy/opml/OPMLImporterTest.kt index 4375a1f0..5ebab734 100644 --- a/capy/src/test/java/com/jocmp/capy/opml/OPMLImporterTest.kt +++ b/capy/src/test/java/com/jocmp/capy/opml/OPMLImporterTest.kt @@ -4,7 +4,7 @@ import com.jocmp.capy.Account import com.jocmp.capy.InMemoryDatabaseProvider import com.jocmp.capy.MockFeedFinder import com.jocmp.capy.accounts.FakeFaviconFetcher -import com.jocmp.capy.accounts.LocalAccountDelegate +import com.jocmp.capy.accounts.local.LocalAccountDelegate import com.jocmp.capy.db.Database import com.jocmp.capy.fixtures.AccountFixture import com.jocmp.capy.fixtures.GenericFeed