From 68cf08c500d2bbacc325fecb14834381d7541dfe Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Tue, 9 Jan 2024 23:30:13 +0100
Subject: [PATCH 1/8] Add website handler 'Shein'

Add website handler 'Shein'
Add key 'shein' to dictionary 'SUPPORTED_DOMAINS'
Add url 'https://us.shein.com/' to test_add_product
---
 scraper/domains.py        | 22 ++++++++++++++++++++++
 tests/test_add_product.py |  1 +
 2 files changed, 23 insertions(+)

diff --git a/scraper/domains.py b/scraper/domains.py
index 68fa14bf..dd988d83 100644
--- a/scraper/domains.py
+++ b/scraper/domains.py
@@ -515,6 +515,27 @@ def get_short_url(self) -> str:
 
 
 def get_website_name(url: str, keep_tld=False, keep_http=False, keep_www=False) -> str:
+class Shein(BaseWebsiteHandler):
+    def _get_common_data(self) -> None:
+        script_data_raw = self.request_data.find_all("script", type="application/ld+json")[1].text
+        self.script_json = json.loads(script_data_raw)
+
+    def _get_product_name(self) -> str:
+        return self.script_json.get("name")
+
+    def _get_product_price(self) -> float:
+        return float(self.script_json.get("offers").get("price"))
+
+    def _get_product_currency(self) -> str:
+        return self.script_json.get("offers").get("priceCurrency")
+
+    def _get_product_id(self) -> str:
+        return self.script_json.get("sku")
+
+    def get_short_url(self) -> str:
+        return self.url
+
+
     stripped_url = url if keep_http else url.removeprefix("https://").removeprefix("http://")
     stripped_url = stripped_url if keep_www else stripped_url.replace("www.", "", 1)
     domain = "/".join(stripped_url.split("/")[0:3]) if keep_http else stripped_url.split("/")[0]
@@ -553,4 +574,5 @@ def get_website_handler(url: str) -> BaseWebsiteHandler:
     "sharkgaming": SharkGamingHandler,
     "newegg": NeweggHandler,
     "hifiklubben": HifiKlubbenHandler,
+    "shein": Shein,
 }
diff --git a/tests/test_add_product.py b/tests/test_add_product.py
index 14cd2f3a..1c3aea67 100644
--- a/tests/test_add_product.py
+++ b/tests/test_add_product.py
@@ -22,6 +22,7 @@
     ("https://sharkgaming.dk/", does_not_raise()),
     ("https://www.newegg.com/", does_not_raise()),
     ("https://www.hifiklubben.dk/", does_not_raise()),
+    ("https://us.shein.com/", does_not_raise()),
     ("https://www.notsupported.com/", pytest.raises(WebsiteNotSupported)),
 ]
 

From 4c3227fafd0060b35054f4086b2d642013090502 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Tue, 9 Jan 2024 23:42:36 +0100
Subject: [PATCH 2/8] Fix domains.py

---
 scraper/domains.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scraper/domains.py b/scraper/domains.py
index dd988d83..cf82e2c0 100644
--- a/scraper/domains.py
+++ b/scraper/domains.py
@@ -514,7 +514,6 @@ def get_short_url(self) -> str:
         return f"{website}/{id}"
 
 
-def get_website_name(url: str, keep_tld=False, keep_http=False, keep_www=False) -> str:
 class Shein(BaseWebsiteHandler):
     def _get_common_data(self) -> None:
         script_data_raw = self.request_data.find_all("script", type="application/ld+json")[1].text
@@ -536,6 +535,7 @@ def get_short_url(self) -> str:
         return self.url
 
 
+def get_website_name(url: str, keep_tld=False, keep_http=False, keep_www=False) -> str:
     stripped_url = url if keep_http else url.removeprefix("https://").removeprefix("http://")
     stripped_url = stripped_url if keep_www else stripped_url.replace("www.", "", 1)
     domain = "/".join(stripped_url.split("/")[0:3]) if keep_http else stripped_url.split("/")[0]

From f1fcf5d6fdcd667a3fc7fdc5b861ae69e010731f Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Wed, 10 Jan 2024 17:45:39 +0100
Subject: [PATCH 3/8] Add Shein.com to supported websites in README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index ab0f208e..3605796e 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,7 @@ This scraper can (so far) scrape prices on products from:
 - [Sharkgaming.dk](https://www.sharkgaming.dk/)
 - [Newegg.com](https://www.newegg.com/) & [Newegg.ca](https://www.newegg.ca/)
 - [HifiKlubben.dk](https://www.hifiklubben.dk/)
+- [Shein.com](https://www.us.shein.com/)
 
 ****OBS these Amazon domains should work: [.com](https://www.amazon.com/), [.ca](https://www.amazon.ca/), [.es](https://www.amazon.es/), [.fr](https://www.amazon.fr/), [.de](https://www.amazon.de/) and [.it](https://www.amazon.it/)<br/>
 The listed Amazon domains is from my quick testing with one or two products from each domain.<br/>

From 940b286ddc1ef528e77fe323f94a0985701af54e Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 12 Jan 2024 23:38:07 +0100
Subject: [PATCH 4/8] Add shein to WEBSITE_COLORS

---
 scraper/constants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scraper/constants.py b/scraper/constants.py
index cd75cf58..44c913bb 100644
--- a/scraper/constants.py
+++ b/scraper/constants.py
@@ -18,6 +18,7 @@
     "sharkgaming": "midnightblue",
     "newegg": "#f7c20a",
     "hifiklubben": "#231f20",
+    "shein": "#ffed24",
 }
 
 URL_SCHEMES = ("http://", "https://")

From 33a6f562e49c5c2fad8fb8c33c4428b2e4f92750 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 12 Jan 2024 23:56:03 +0100
Subject: [PATCH 5/8] Delete duplicate Shein website handler

---
 scraper/domains.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/scraper/domains.py b/scraper/domains.py
index 1ed76583..e47fbd93 100644
--- a/scraper/domains.py
+++ b/scraper/domains.py
@@ -530,27 +530,6 @@ def get_short_url(self) -> str:
         return self.url
 
 
-class Shein(BaseWebsiteHandler):
-    def _get_common_data(self) -> None:
-        script_data_raw = self.request_data.find_all("script", type="application/ld+json")[1].text
-        self.script_json = json.loads(script_data_raw)
-
-    def _get_product_name(self) -> str:
-        return self.script_json.get("name")
-
-    def _get_product_price(self) -> float:
-        return float(self.script_json.get("offers").get("price"))
-
-    def _get_product_currency(self) -> str:
-        return self.script_json.get("offers").get("priceCurrency")
-
-    def _get_product_id(self) -> str:
-        return self.script_json.get("sku")
-
-    def get_short_url(self) -> str:
-        return self.url
-
-
 def get_website_name(url: str, keep_tld=False, keep_http=False, keep_www=False, keep_subdomain=True) -> str:
     stripped_url = url if keep_http else url.removeprefix("https://").removeprefix("http://")
 

From b07f4757295bfb8d9bed13ca6b861dd2f673c7b4 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 12 Jan 2024 23:57:01 +0100
Subject: [PATCH 6/8] Update link to Shein in README

Remove subdomain 'us' from Shein link
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3605796e..ddfa523e 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,7 @@ This scraper can (so far) scrape prices on products from:
 - [Sharkgaming.dk](https://www.sharkgaming.dk/)
 - [Newegg.com](https://www.newegg.com/) & [Newegg.ca](https://www.newegg.ca/)
 - [HifiKlubben.dk](https://www.hifiklubben.dk/)
-- [Shein.com](https://www.us.shein.com/)
+- [Shein.com](https://www.shein.com/)
 
 ****OBS these Amazon domains should work: [.com](https://www.amazon.com/), [.ca](https://www.amazon.ca/), [.es](https://www.amazon.es/), [.fr](https://www.amazon.fr/), [.de](https://www.amazon.de/) and [.it](https://www.amazon.it/)<br/>
 The listed Amazon domains is from my quick testing with one or two products from each domain.<br/>

From 9c1cb035983ba3af1a7b00679794943bfc52c27a Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Sat, 27 Jan 2024 16:32:27 +0100
Subject: [PATCH 7/8] Rename class 'Shein' to 'SheinHandler'

---
 scraper/domains.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scraper/domains.py b/scraper/domains.py
index e47fbd93..56a4468f 100644
--- a/scraper/domains.py
+++ b/scraper/domains.py
@@ -509,7 +509,7 @@ def get_short_url(self) -> str:
         return f"{website}/{id}"
 
 
-class Shein(BaseWebsiteHandler):
+class SheinHandler(BaseWebsiteHandler):
     def _get_common_data(self) -> None:
         script_data_raw = self.request_data.find_all("script", type="application/ld+json")[1].text
         self.script_json = json.loads(script_data_raw)
@@ -584,5 +584,5 @@ def get_website_handler(url: str) -> BaseWebsiteHandler:
     "sharkgaming": SharkGamingHandler,
     "newegg": NeweggHandler,
     "hifiklubben": HifiKlubbenHandler,
-    "shein": Shein,
+    "shein": SheinHandler,
 }

From 467a74dbc90792960be973ca059a055330c35b2c Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Sat, 27 Jan 2024 16:32:45 +0100
Subject: [PATCH 8/8] Add website handler tests for SheinHandler

---
 tests/test_objects.json        |  6 ++++++
 tests/test_website_handlers.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/tests/test_objects.json b/tests/test_objects.json
index 5997d58b..0c65e877 100644
--- a/tests/test_objects.json
+++ b/tests/test_objects.json
@@ -95,6 +95,12 @@
       "expected_title": "SENNHEISER MOMENTUM 4 WIRELESS",
       "expected_id": "senmomentum4bk",
       "expected_currency": "DKK"
+    },
+    "shein": {
+      "link": "https://euqs.shein.com/Men-s-Letter-Print-Slim-Fit-Short-Sleeve-T-Shirt-p-28492178.html",
+      "expected_title": "Men's Letter Print Slim Fit Short Sleeve T-Shirt",
+      "expected_id": "sm2311284334246374",
+      "expected_currency": "EUR"
     }
   }
 }
diff --git a/tests/test_website_handlers.py b/tests/test_website_handlers.py
index 9914bc34..bcc9d92a 100644
--- a/tests/test_website_handlers.py
+++ b/tests/test_website_handlers.py
@@ -17,6 +17,7 @@
     ProshopHandler,
     SharkGamingHandler,
     HifiKlubbenHandler,
+    SheinHandler,
 )
 from scraper.models import Info
 
@@ -47,6 +48,7 @@ def read_json(filename: str) -> dict:
 sharkgaming_test = test_website_handlers_json["sharkgaming"]
 newegg_test = test_website_handlers_json["newegg"]
 hifiklubben_test = test_website_handlers_json["hifiklubben"]
+shein_test = test_website_handlers_json["shein"]
 
 
 class BaseTestWebsiteHandler(ABC):
@@ -560,3 +562,33 @@ def test_get_id(self) -> None:
         id = self.test_handler._get_product_id()
         assert isinstance(id, str)
         assert id == hifiklubben_test["expected_id"]
+
+
+class TestSheinHandler(BaseTestWebsiteHandler):
+    test_handler = SheinHandler(shein_test["link"])
+
+    def test_get_product_info(self, mocker) -> None:
+        mocker.patch("scraper.domains.BaseWebsiteHandler._request_product_data", return_value=self.test_handler.request_data)
+        actual = self.test_handler.get_product_info()
+        assert isinstance(actual, Info)
+        assert actual.valid
+
+    def test_get_name(self) -> None:
+        actual = self.test_handler._get_product_name().lower()
+        expected = shein_test["expected_title"].lower()
+        assert isinstance(actual, str)
+        assert actual == expected
+
+    def test_get_price(self) -> None:
+        price = self.test_handler._get_product_price()
+        assert isinstance(price, float)
+
+    def test_get_currency(self) -> None:
+        currency = self.test_handler._get_product_currency()
+        assert isinstance(currency, str)
+        assert currency == shein_test["expected_currency"]
+
+    def test_get_id(self) -> None:
+        id = self.test_handler._get_product_id()
+        assert isinstance(id, str)
+        assert id == shein_test["expected_id"]