Initial commit

bashonly · Mar 16, 2023 · dd577ca · dd577ca
commit dd577ca
Show file tree

Hide file tree

Showing 6 changed files with 239 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+*.egg-info/
+*.pyc
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/README.md b/README.md
@@ -0,0 +1,16 @@
+A [yt-dlp](https://github.com/yt-dlp/yt-dlp) extractor [plugin](https://github.com/yt-dlp/yt-dlp#plugins) for downloading all videos from a TikTok user
+
+---
+
+Based on [redraskal's TikTokUserIE fork](https://github.com/redraskal/yt-dlp/tree/fix/tiktok-user)
+
+## Installation
+
+Requires yt-dlp `2023.02.17` or above.
+
+You can install this package with pip:
+```
+python3 -m pip install -U https://github.com/bashonly/yt-dlp-TTUser/archive/master.zip
+```
+
+See [yt-dlp installing plugins](https://github.com/yt-dlp/yt-dlp#installing-plugins) for the many other ways this plugin package can be installed.
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.distutils.bdist_wheel]
+universal = true
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,11 @@
+[metadata]
+name = yt-dlp-TTUser
+version = 2023.03.16
+
+[options]
+packages = find_namespace:
+
+[flake8]
+exclude = build,venv,.tox,.git,.pytest_cache
+ignore = E402,E501,E731,E741,W503
+max_line_length = 120
diff --git a/yt_dlp_plugins/extractor/tt_user.py b/yt_dlp_plugins/extractor/tt_user.py
@@ -0,0 +1,179 @@
+import base64
+import itertools
+import random
+import string
+import urllib.parse
+
+from yt_dlp.aes import aes_cbc_encrypt_bytes
+from yt_dlp.utils import (
+    ExtractorError,
+    traverse_obj,
+    try_call,
+    update_url_query,
+)
+from yt_dlp.extractor.tiktok import TikTokIE, TikTokUserIE
+
+
+class TikTokUser_TTUserIE(TikTokUserIE, plugin_name='TTUser'):
+    IE_NAME = 'tiktok:user'
+    _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
+    _WORKING = True
+    _TESTS = [{
+        'url': 'https://tiktok.com/@therock?lang=en',
+        'playlist_mincount': 25,
+        'info_dict': {
+            'id': 'therock',
+        },
+    }]
+
+    _API_BASE_URL = 'https://us.tiktok.com/api/post/item_list/'
+    _PARAMS = {
+        'aid': '1988',
+        'app_language': 'en',  # url only?
+        'app_name': 'tiktok_web',
+        'browser_language': 'en-US',
+        'browser_name': 'Mozilla',
+        'browser_online': 'true',
+        'browser_platform': 'Win32',
+        'browser_version': '5.0 (Windows)',
+        'channel': 'tiktok_web',
+        'cookie_enabled': 'true',
+        'device_id': ''.join(random.choices(string.digits, k=19)),
+        'device_platform': 'web_pc',
+        'focus_state': 'false',
+        'from_page': 'user',
+        'history_len': '2',
+        'is_encryption': '1',
+        'is_fullscreen': 'false',
+        'is_page_visible': 'true',
+        'os': 'windows',
+        'region': 'US',
+        'screen_height': '1080',
+        'screen_width': '1920',
+        'tz_name': 'UTC',  # x-tt-params only?
+        'webcast_language': 'en',  # x-tt-params only?
+    }
+    _PARAMS_AES_KEY = b'webapp1.0+202106'
+
+    def _x_tt_params(self, sec_uid, cursor):
+        query = self._PARAMS.copy()
+        # query.pop('app_language', None)
+        query.update({
+            'cursor': cursor,
+            'language': 'en',
+            'priority_region': '',
+            'referer': '',
+            'root_referer': 'undefined',
+            'secUid': sec_uid,
+            'userId': 'undefined',
+            'verifyFp': 'undefined',
+        })
+        return base64.b64encode(aes_cbc_encrypt_bytes(
+            urllib.parse.urlencode(dict(sorted(query.items()))),
+            self._PARAMS_AES_KEY, self._PARAMS_AES_KEY)).decode()
+
+    def _entries(self, sec_uid, user_name):
+        try:
+            from playwright.sync_api import sync_playwright
+        except ImportError:
+            raise ExtractorError('Playwright is not installed', expected=True)
+
+        api_url = update_url_query(self._API_BASE_URL, self._PARAMS)
+        cursor = '0'
+
+        with sync_playwright() as p:
+            browser = p.firefox.launch(
+                args=['--mute-audio'], executablePath=self._configuration_arg('firefox_path', [None], ie_key=TikTokIE)[0])
+            webpage = browser.new_page()
+            webpage.goto('https://www.tiktok.com/', wait_until='load')
+            webpage.wait_for_timeout(2000)
+
+            for page in itertools.count(1):
+                self.to_screen(f'Downloading page {page}')
+                res = webpage.evaluate(
+                    '([api_url, params]) => fetch(api_url, { headers: { "x-tt-params": params } }).then(res => res.json())',
+                    [api_url, self._x_tt_params(sec_uid, cursor)])
+
+                for video in traverse_obj(res, ('itemList', ..., {dict})):
+                    video_id = video.get('id')
+                    if video_id:
+                        yield self.url_result(self._create_url(user_name, video_id), TikTokIE, video_id)
+                        # entry = {}
+                        # try:
+                        #     entry = self._extract_aweme_app(video_id)
+                        # except ExtractorError:
+                        #     self.report_warning('Failed to extract from feed; falling back to web API response')
+                        #     aweme_detail = traverse_obj(
+                        #         res, ('itemList', lambda _, v: v['id'] == video_id, {dict}), get_all=False)
+                        #     if traverse_obj(aweme_detail, ('video', 'playAddr')):
+                        #         entry = self._parse_aweme_video_web(
+                        #             aweme_detail, self._create_url(user_name, video_id))
+                        #
+                        # if entry:
+                        #     yield {
+                        #         **entry,
+                        #         'extractor_key': TikTokIE.ie_key(),
+                        #         'extractor': 'TikTok',
+                        #         'webpage_url': self._create_url(user_name, video_id),
+                        #     }
+                        # else:
+                        #     self.report_warning(f'Unable to extract video {video_id}')
+
+                if not res.get('hasMore') or not res.get('cursor'):
+                    break
+                cursor = res['cursor']
+
+            browser.close()
+
+    def _parse_aweme_video_app(self, aweme_detail):
+        ret = super()._parse_aweme_video_app(aweme_detail)
+        ret['channel_id'] = traverse_obj(aweme_detail, ('author', 'sec_uid'))
+        return ret
+
+    def _get_sec_uid(self, user_url, user_name, msg):
+        webpage = self._download_webpage(
+            user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'},
+            note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage')
+        data = try_call(lambda: self._get_sigi_state(webpage, user_name))
+        return traverse_obj(
+            data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid'),
+            ('UserModule', 'users', ..., 'secUid'), get_all=False, expected_type=str)
+
+    def _real_extract(self, url):
+        user_name = self._match_id(url)
+        sec_uid = self._configuration_arg('sec_uid', [None], ie_key=TikTokIE)[0]
+
+        if not sec_uid:
+            for user_url, msg in (
+                (self._UPLOADER_URL_FORMAT % user_name, 'user'),
+                (self._UPLOADER_URL_FORMAT % f'{user_name}/live', 'live'),
+            ):
+                sec_uid = self._get_sec_uid(user_url, user_name, msg)
+                if sec_uid:
+                    break
+
+        if not sec_uid:
+            webpage = self._download_webpage(
+                f'https://www.tiktok.com/embed/@{user_name}', user_name, note='Downloading user embed page')
+            data = traverse_obj(self._search_json(
+                r'(?s)<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>', webpage, 'data', user_name),
+                ('source', 'data', f'/embed/@{user_name}', {dict}))
+
+            info = {}
+            for aweme_id in traverse_obj(data, ('videoList', ..., 'id')):
+                try:
+                    info = self._extract_aweme_app(aweme_id)
+                except ExtractorError:
+                    continue
+
+                sec_uid = info.get('channel_id')
+                if sec_uid:
+                    break
+
+            if not sec_uid:
+                raise ExtractorError('Could not extract secondary user ID')
+
+        return self.playlist_result(self._entries(sec_uid, user_name), user_name)
+
+
+__all__ = []