Skip to content

Commit

Permalink
fix: Amazon sanitizer for gp/product links
Browse files Browse the repository at this point in the history
  • Loading branch information
svenjacobs committed Sep 12, 2023
1 parent aa37428 commit d8e0d0d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class AmazonProductSanitizer : Sanitizer {
}

private companion object {
private val REGEX = Regex("((?:https?://)?(?:www\\.)?amazon\\.[^/]*).*/dp?/([^/]*)")
private val REGEX =
Regex("((?:https?://)?(?:www\\.)?amazon\\.[^/]*).*/(?:dp?|gp/product)?/([^/?&]*)")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class AmazonProductSanitizerTest : WordSpec(

"invoke" should {

"clean Amazon product link" {
"clean Amazon product link (dp)" {
var result = sanitizer(
"https://www.amazon.de/Xiaomi-Aktivit%C3%A4tstracker-Trainings-Puls%C3%" +
"BCberwachung-Akkulaufzeit/dp/B091G3FLL7/?_encoding=UTF8&pd_rd_w=xDcJP&pf" +
Expand All @@ -49,10 +49,17 @@ class AmazonProductSanitizerTest : WordSpec(
result shouldBe "https://www.amazon.co.uk/dp/B091G3FLL7/"
}

"clean Amazon product link (gp/product)" {
val result = sanitizer(
"https://www.amazon.fr/gp/product/B0C9JKKL7N?tag=egcdealabs08-21&ascsubtag=1498016995",
)

result shouldBe "https://www.amazon.fr/dp/B0C9JKKL7N/"
}

"clean Amazon cart product link" {
sanitizer(
"https://www.amazon.com/gp/aw/d/B009EEZYE0/ref=ox_sc_act_image_1?smid=A" +
"TVPDKIKX0DER&psc=1",
"https://www.amazon.com/gp/aw/d/B009EEZYE0/ref=ox_sc_act_image_1?smid=ATVPDKIKX0DER&psc=1",
) shouldBe "https://www.amazon.com/dp/B009EEZYE0/"
}

Expand Down

0 comments on commit d8e0d0d

Please sign in to comment.