code-yeongyu · code-yeongyu · Dec 4, 2022 · Dec 4, 2022
diff --git a/twitter_video_tools/twitter_crawler.py b/twitter_video_tools/twitter_crawler.py
@@ -29,15 +29,15 @@ def login(self, username: str, password: str, timeout: Optional[float] = 10000)
         self.page.get_by_label('Password').press('Enter')
         self.page.wait_for_url('https://twitter.com/home')
 
-    def get_all_liked_tweets(self, username: str, scroll_timeout: float = 0.8) -> list[str]:
+    def get_all_liked_video_tweets(self, username: str, scroll_timeout: float = 0.8) -> list[str]:
         """Get the username's all liked tweets
         Returns the list of links of liked tweets
         """
-        return self.get_liked_tweets_until(
+        return self.get_liked_video_tweets_until(
             username, 'nothing', scroll_timeout
         )    # 'nothing' was intended because the given `until_link` would be never found on the links list
 
-    def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout: float = 0.8) -> list[str]:
+    def get_liked_video_tweets_until(self, username: str, until_link: str, scroll_timeout: float = 0.8) -> list[str]:
         """Scrolling down the list of liked tweets until the given `until_link` found
         Returns the list of links of liked tweets
         """
@@ -57,7 +57,7 @@ def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout:
                 break
             previous_height = self.page_current_height
 
-            new_links = self._get_article_links_in_current_screen()
+            new_links = self._get_video_tweets_in_current_screen()
             links.extend(new_links)
             links = list(set(links))
 
@@ -70,7 +70,7 @@ def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout:
 
     def get_recent_liked_tweet(self, username: str) -> str:
         self._goto_liked_tweets(username)
-        return self._get_article_links_in_current_screen()[0]
+        return self._get_tweets_in_current_screen()[0]
 
     def get_video_of_tweet(self, link: str, timeout: Optional[float] = 10000) -> list[tuple[str, str]]:
         video_links: list[str] = []
@@ -97,7 +97,26 @@ def _goto_liked_tweets(self, username: str) -> None:
         self.page.goto(f'https://twitter.com/{username}/likes')
         self.page.wait_for_selector('article')
 
-    def _get_article_links_in_current_screen(self) -> list[str]:
+    def _get_video_tweets_in_current_screen(self) -> list[str]:
+        links: list[str] = []
+
+        while True:
+            articles = self.page.locator('article:has(video)')
+            article_length = articles.count()
+            try:
+                links = [
+                    'https://twitter.com' +
+                    (articles.nth(i).locator('div').locator('a').nth(3).get_attribute('href', timeout=500) or '')
+                    for i in range(article_length)
+                ]
+                break
+            except Error:    # if articles in the page are not reachable
+                self.page.mouse.wheel(0, 500)    #  scrolling down to refresh the articles
+                self.page.mouse.wheel(0, -500)    #  scrolling down to refresh the articles
+
+        return links
+
+    def _get_tweets_in_current_screen(self) -> list[str]:
         links: list[str] = []
 
         while True: