Version 1.7.0 captcha solver

d60 · Jun 2, 2024 · 0404e25 · 0404e25
1 parent 119563c
commit 0404e25
Show file tree

Hide file tree

Showing 22 changed files with 1,164 additions and 489 deletions.
diff --git a/docs/twikit.rst b/docs/twikit.rst
@@ -131,6 +131,15 @@ Geo
    :show-inheritance:
    :member-order: bysource
 
+Capsolver
+-------------------
+
+.. automodule:: twikit._captcha.capsolver
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :member-order: bysource
+
 Utils
 -------------------
 

diff --git a/docs/twikit.twikit_async.rst b/docs/twikit.twikit_async.rst
@@ -125,7 +125,16 @@ Notification
 Geo
 -------------------
 
-.. automodule:: twikit.geo
+.. automodule:: twikit.twikit_async.geo
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :member-order: bysource
+
+Capsolver
+-------------------
+
+.. automodule:: twikit.twikit_async._captcha.capsolver
    :members:
    :undoc-members:
    :show-inheritance:

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 httpx
-fake_useragent
-filetype
+filetype
+beautifulsoup4
diff --git a/setup.py b/setup.py
@@ -13,8 +13,8 @@
     version=version,
     install_requires=[
         'httpx',
-        'fake_useragent',
-        'filetype'
+        'filetype',
+        'beautifulsoup4'
     ],
     python_requires='>=3.10',
     description='Twitter API wrapper for python with **no API key required**.',

diff --git a/twikit/__init__.py b/twikit/__init__.py
@@ -7,8 +7,9 @@
 A Python library for interacting with the Twitter API.
 """
 
-__version__ = '1.6.4'
+__version__ = '1.7.0'
 
+from ._captcha import Capsolver
 from .bookmark import BookmarkFolder
 from .client import Client
 from .community import (Community, CommunityCreator, CommunityMember,

diff --git a/twikit/_captcha/__init__.py b/twikit/_captcha/__init__.py
@@ -0,0 +1,2 @@
+from .base import CaptchaSolver
+from .capsolver import Capsolver
diff --git a/twikit/_captcha/base.py b/twikit/_captcha/base.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING, NamedTuple
+
+from bs4 import BeautifulSoup
+from httpx import Response
+
+from twikit.utils import urlencode
+
+if TYPE_CHECKING:
+    from ..client import Client
+
+
+class UnlockHTML(NamedTuple):
+    authenticity_token: str
+    assignment_token: str
+    needs_unlock: bool
+    start_button: bool
+    finish_button: bool
+    delete_button: bool
+    blob: str
+
+
+class CaptchaSolver:
+    client: Client
+    max_attempts: int
+
+    CAPTCHA_URL = 'https://twitter.com/account/access'
+    CAPTCHA_SITE_KEY = '0152B4EB-D2DC-460A-89A1-629838B529C9'
+
+    def get_unlock_html(self) -> tuple[Response, UnlockHTML]:
+        headers = {
+            'X-Twitter-Client-Language': 'en-US',
+            'User-Agent': self.client._user_agent,
+            'Upgrade-Insecure-Requests': '1'
+        }
+        _, response = self.client.get(
+            self.CAPTCHA_URL, headers=headers
+        )
+        return response, parse_unlock_html(response.text)
+
+    def ui_metrix(self) -> str:
+        js, _ = self.client.get(
+            'https://twitter.com/i/js_inst?c_name=ui_metrics'
+        )
+        return re.findall(r'return ({.*?});', js, re.DOTALL)[0]
+
+    def confirm_unlock(
+        self,
+        authenticity_token: str,
+        assignment_token: str,
+        verification_string: str = None,
+        ui_metrics: bool = False
+    ) -> tuple[Response, UnlockHTML]:
+        data = {
+            'authenticity_token': authenticity_token,
+            'assignment_token': assignment_token,
+            'lang': 'en',
+            'flow': '',
+        }
+        params = {}
+        if verification_string:
+            data['verification_string'] = verification_string
+            data['language_code'] = 'en'
+            params['lang'] = 'en'
+        if ui_metrics:
+            data['ui_metrics'] = self.ui_metrix()
+        data = urlencode(data)
+        headers = {
+            'Content-Type': 'application/x-www-form-urlencoded',
+            'Upgrade-Insecure-Requests': '1',
+            'Referer': self.CAPTCHA_URL
+        }
+        _, response = self.client.post(
+            self.CAPTCHA_URL, params=params, data=data, headers=headers
+        )
+        return response, parse_unlock_html(response.text)
+
+
+def parse_unlock_html(html: str) -> UnlockHTML:
+    soup = BeautifulSoup(html, 'lxml')
+
+    authenticity_token = None
+    authenticity_token_element = soup.find(
+        'input', {'name': 'authenticity_token'}
+    )
+    if authenticity_token_element is not None:
+        authenticity_token: str = authenticity_token_element.get('value')
+
+    assignment_token = None
+    assignment_token_element = soup.find('input', {'name': 'assignment_token'})
+    if assignment_token_element is not None:
+        assignment_token = assignment_token_element.get('value')
+
+    verification_string = soup.find('input', id='verification_string')
+    needs_unlock = bool(verification_string)
+    start_button = bool(soup.find('input', value='Start'))
+    finish_button = bool(soup.find('input', value='Continue to X'))
+    delete_button = bool(soup.find('input', value='Delete'))
+
+    iframe = soup.find(id='arkose_iframe')
+    blob = re.findall(r'data=(.+)', iframe['src'])[0] if iframe else None
+
+    return UnlockHTML(
+        authenticity_token,
+        assignment_token,
+        needs_unlock,
+        start_button,
+        finish_button,
+        delete_button,
+        blob
+    )
diff --git a/twikit/_captcha/capsolver.py b/twikit/_captcha/capsolver.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+from time import sleep
+
+import httpx
+
+from .base import CaptchaSolver
+
+
+class Capsolver(CaptchaSolver):
+    """
+    You can automatically unlock the account by passing the `captcha_solver`
+    argument when initialising the :class:`.Client`.
+
+    First, visit https://capsolver.com and obtain your Capsolver API key.
+    Next, pass the Capsolver instance to the client as shown in the example.
+
+    .. code-block:: python
+
+        from twikit.twikit_async import Capsolver, Client
+        solver = Capsolver(
+            api_key='your_api_key',
+            max_attempts=10
+        )
+        client = Client(captcha_solver=solver)
+
+    Parameters
+    ----------
+    api_key : :class:`str`
+        Capsolver API key.
+    max_attempts : :class:`int`, default=3
+        The maximum number of attempts to solve the captcha.
+    get_result_interval : :class:`float`, default=1.0
+
+    use_blob_data : :class:`bool`, default=False
+    """
+    def __init__(
+        self,
+        api_key: str,
+        max_attempts: int = 3,
+        get_result_interval: float = 1.0,
+        use_blob_data: bool = False
+    ) -> None:
+        self.api_key = api_key
+        self.get_result_interval = get_result_interval
+        self.max_attempts = max_attempts
+        self.use_blob_data = use_blob_data
+
+    def create_task(self, task_data: dict) -> dict:
+        data = {
+            'clientKey': self.api_key,
+            'task': task_data
+        }
+        response = httpx.post(
+            'https://api.capsolver.com/createTask',
+            json=data,
+            headers={'content-type': 'application/json'}
+        ).json()
+        return response
+
+    def get_task_result(self, task_id: str) -> dict:
+        data = {
+            'clientKey': self.api_key,
+            'taskId': task_id
+        }
+        response = httpx.post(
+            'https://api.capsolver.com/getTaskResult',
+            json=data,
+            headers={'content-type': 'application/json'}
+        ).json()
+        return response
+
+    def solve_funcaptcha(self, blob: str) -> dict:
+        if self.client.proxy is None:
+            captcha_type = 'FunCaptchaTaskProxyLess'
+        else:
+            captcha_type = 'FunCaptchaTask'
+
+        task_data = {
+            'type': captcha_type,
+            'websiteURL': 'https://iframe.arkoselabs.com',
+            'websitePublicKey': self.CAPTCHA_SITE_KEY,
+            'funcaptchaApiJSSubdomain': 'https://client-api.arkoselabs.com',
+        }
+        if self.use_blob_data:
+            task_data['data'] = '{"blob":"%s"}' % blob
+            task_data['userAgent'] = self.client._user_agent
+        task = self.create_task(task_data)
+        while True:
+            sleep(self.get_result_interval)
+            result = self.get_task_result(task['taskId'])
+            if result['status'] in ('ready', 'failed'):
+                return result
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .base import CaptchaSolver
		from .capsolver import Capsolver