Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Facebook sanitizer for mobile domain and story URLs #246

Merged
merged 1 commit into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

package com.svenjacobs.app.leon.core.common.domain

fun String.matchesDomain(domain: String, isRegex: Boolean = false): Boolean {
val regexDomain = if (!isRegex) domain.replace(".", "\\.") else domain
return Regex("^(?:https?://)?(?:www\\.)?$regexDomain.*").matches(this)
}
fun String.matchesDomainRegex(domain: String): Boolean =
Regex("^(?:https?://)?(?:www\\.)?$domain.*").matches(this)

fun String.matchesDomain(domain: String): Boolean = matchesDomainRegex(domain.replace(".", "\\."))
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2022 Sven Jacobs
* Copyright (C) 2023 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -26,7 +26,7 @@ object RegexFactory {
val AllParameters = Regex("\\?.*")

/**
* Returns a regex string which matches a certain parameter.
* Returns a Regex which matches a certain parameter.
*
* For example `ofParameter("abc")` returns a regex string which matches `?abc=` or `&abc=`.
*
Expand All @@ -36,7 +36,7 @@ object RegexFactory {
fun ofParameter(parameter: String): Regex = Regex("[?&](?:$parameter)=([^&#]*)")

/**
* Returns a regex string which matches a certain parameter prefix.
* Returns a Regex which matches a certain parameter prefix.
*
* For example `ofWildcardParameter("abc_")` returns a regex string which matches `?abc_x=`,
* `&abc_y=`, `&abc_zzz=` et cetera.
Expand All @@ -45,4 +45,14 @@ object RegexFactory {
*/
@Suppress("RegExpUnnecessaryNonCapturingGroup")
fun ofWildcardParameter(parameter: String): Regex = Regex("[?&](?:$parameter)[^=]*=([^&#]*)")

/**
* Returns a Regex which removes all parameters except the specified one.
*
* Use regular expression group and OR (`|`) to include several parameters, for example
* `(a|b)`.
*
* @param parameter Parameter to exclude
*/
fun exceptParameter(parameter: String): Regex = Regex("[?&](?!$parameter=)[^&]+")
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ class DomainExtensionsTest : WordSpec(
}

"match domain with regular expression values" {
"https://aliexpress.com/item/32948511896".matchesDomain(
"https://aliexpress.com/item/32948511896".matchesDomainRegex(
domain = "aliexpress\\..+/item/",
isRegex = true,
) shouldBe true
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.aliexpress

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,6 +36,5 @@ class AliexpressSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_aliexpress_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("aliexpress\\..+/item/", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("aliexpress\\..+/item/")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.amazon

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -40,5 +40,5 @@ class AmazonSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_amazon_name),
)

override fun matchesDomain(input: String) = input.matchesDomain("amazon\\..+/", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("amazon\\..+/")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.ebay

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,5 +36,5 @@ class EbaySanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_ebay_name),
)

override fun matchesDomain(input: String) = input.matchesDomain("ebay\\..+/itm/", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("ebay\\..+/itm/")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.facebook

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId

class FacebookSanitizer : RegexSanitizer(
regex = RegexFactory.AllParameters,
regex = RegexFactory.exceptParameter("(id|story_fbid)"),
) {

override val id = SanitizerId("facebook_com")
Expand All @@ -36,5 +36,7 @@ class FacebookSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_facebook_name),
)

override fun matchesDomain(input: String) = input.matchesDomain("facebook.com")
override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(m\\.)?facebook.com",
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.georiot

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
Expand All @@ -37,5 +37,5 @@ class GeoRiotSanitizer : SearchResultSanitizer(
)

override fun matchesDomain(input: String) =
input.matchesDomain("target.georiot\\.[^/]+/Proxy.ashx", isRegex = true)
input.matchesDomainRegex("target.georiot\\.[^/]+/Proxy.ashx")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.google

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
Expand All @@ -36,6 +36,5 @@ class GoogleSearchSanitizer : SearchResultSanitizer(
name = context.getString(R.string.sanitizer_google_search_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("google\\.[^/]+/url", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("google\\.[^/]+/url")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.linksynergy

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
Expand All @@ -36,6 +36,5 @@ class LinkSynergySanitizer : SearchResultSanitizer(
name = context.getString(R.string.sanitizer_linksynergy_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("linksynergy\\.[^/]+/link", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("linksynergy\\.[^/]+/link")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.netflix

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,8 +36,7 @@ class NetflixSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_netflix_name),
)

override fun matchesDomain(input: String) = input.matchesDomain(
override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(help\\.)?netflix.com",
isRegex = true,
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.spotify

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,6 +36,5 @@ class SpotifySanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_spotify_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("(open\\.)?spotify\\.com", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("(open\\.)?spotify\\.com")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.x

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,8 +36,7 @@ class XSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_x_name),
)

override fun matchesDomain(input: String) = input.matchesDomain(
override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(twitter|x)\\.com",
isRegex = true,
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,22 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.youtube

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId

class YoutubeSanitizer : Sanitizer {
class YoutubeSanitizer : RegexSanitizer(
RegexFactory.exceptParameter("(v|search_query)"),
) {

override val id = SanitizerId("youtube")

override fun getMetadata(context: Context) = Sanitizer.Metadata(
name = context.getString(R.string.sanitizer_youtube_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("(m\\.)?youtube\\.com", isRegex = true)

override fun invoke(input: String) = PARAMS_REGEX.replace(input, "")

private companion object {
private val PARAMS_REGEX = Regex("[?&](?!(v|search_query)=)[^&]+")
}
override fun matchesDomain(input: String) = input.matchesDomainRegex("(m\\.)?youtube\\.com")
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,30 @@ class FacebookSanitizerTest : WordSpec(

"invoke" should {

"clean facebook.com URLs" {
"clean facebook.com reel URLs" {
sanitizer("https://www.facebook.com/reel/1242384407160280?sfnsn=scwspmo") shouldBe
"https://www.facebook.com/reel/1242384407160280"
}

"clean m.facebook.com story URLs" {
sanitizer(
"https://m.facebook.com/story.php?story_fbid=pfbid0HqS6zLZvNrQt6ACvjv3h" +
"Kq6khpVse437nWSq2jBifKRD5sVH2XRLC3zz8aA7TKkWl&id=4&sfnsn=wiwspmo&mibext" +
"id=XzsMCV",
) shouldBe "https://m.facebook.com/story.php?story_fbid=pfbid0HqS6zLZvNrQt6ACvjv" +
"3hKq6khpVse437nWSq2jBifKRD5sVH2XRLC3zz8aA7TKkWl&id=4"
}
}

"matchesDomain" should {

"match facebook.com" {
sanitizer.matchesDomain("https://facebook.com") shouldBe true
}

"match m.facebook.com" {
sanitizer.matchesDomain("https://m.facebook.com") shouldBe true
}
}
},
)