From 3bce8c5ee084b28634befeb8d5928d998dc45670 Mon Sep 17 00:00:00 2001 From: reluce Date: Wed, 3 Jan 2024 19:14:40 +0100 Subject: [PATCH 1/3] remove unnecessary imports --- src/szurubooru_toolkit/scripts/__init__.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/szurubooru_toolkit/scripts/__init__.py b/src/szurubooru_toolkit/scripts/__init__.py index 893141c7..e69de29b 100644 --- a/src/szurubooru_toolkit/scripts/__init__.py +++ b/src/szurubooru_toolkit/scripts/__init__.py @@ -1,10 +0,0 @@ -from .auto_tagger import main # noqa F401 -from .create_relations import main # noqa F401 -from .create_tags import main # noqa F401 -from .delete_posts import main # noqa F401 -from .import_from_booru import main # noqa F401 -from .import_from_twitter import main # noqa F401 -from .import_from_url import main # noqa F401 -from .reset_posts import main # noqa F401 -from .tag_posts import main # noqa F401 -from .upload_media import main # noqa F401 From 240d8074e84ee672b9926a5472552ae5b69db692 Mon Sep 17 00:00:00 2001 From: reluce Date: Sat, 10 Feb 2024 11:55:27 +0100 Subject: [PATCH 2/3] remove deprecated dependencies --- src/szurubooru_toolkit/__init__.py | 8 +- .../scripts/import_from_twitter.py | 125 ----------- src/szurubooru_toolkit/twitter.py | 201 ------------------ 3 files changed, 1 insertion(+), 333 deletions(-) delete mode 100644 src/szurubooru_toolkit/scripts/import_from_twitter.py delete mode 100644 src/szurubooru_toolkit/twitter.py diff --git a/src/szurubooru_toolkit/__init__.py b/src/szurubooru_toolkit/__init__.py index c28acd71..bbe9127b 100644 --- a/src/szurubooru_toolkit/__init__.py +++ b/src/szurubooru_toolkit/__init__.py @@ -6,10 +6,8 @@ from .config import Config from .danbooru import Danbooru # noqa F401 from .gelbooru import Gelbooru # noqa F401 -from .pixiv import Pixiv from .szurubooru import Post # noqa F401 from .szurubooru import Szurubooru -from .twitter import Twitter # noqa F401 from .utils import audit_rating # noqa F401 from .utils import collect_sources # noqa F401 from .utils import convert_rating # noqa F401 @@ -37,11 +35,7 @@ ) config = Config() -if ( - config.auto_tagger['deepbooru_enabled'] - or config.import_from_url['deepbooru_enabled'] - or config.import_from_booru['deepbooru_enabled'] -): +if config.auto_tagger['deepbooru_enabled'] or config.import_from_url['deepbooru_enabled'] or config.import_from_booru['deepbooru_enabled']: from .deepbooru import Deepbooru # noqa F401 setup_logger(config) diff --git a/src/szurubooru_toolkit/scripts/import_from_twitter.py b/src/szurubooru_toolkit/scripts/import_from_twitter.py deleted file mode 100644 index 3dfbc086..00000000 --- a/src/szurubooru_toolkit/scripts/import_from_twitter.py +++ /dev/null @@ -1,125 +0,0 @@ -import argparse - -from loguru import logger -from tqdm import tqdm -from tweepy import errors - -from szurubooru_toolkit import Twitter -from szurubooru_toolkit import config -from szurubooru_toolkit import szuru -from szurubooru_toolkit.scripts import upload_media -from szurubooru_toolkit.utils import download_media -from szurubooru_toolkit.utils import get_md5sum - - -def parse_args() -> tuple: - """Parse the input args to the script import_from_twitter.py and set the object attributes accordingly.""" - - parser = argparse.ArgumentParser( - description='This script fetches media files from your Twitter likes, uploads and optionally tags them.', - ) - - parser.add_argument( - '--limit', - type=int, - default=25, - help='Limit the amount of Twitter posts returned (default: 25)', - ) - - parser.add_argument( - '--user-id', - type=int, - default=None, - help='Fetch likes from the specified user id.', - ) - - args = parser.parse_args() - limit = args.limit - user_id = args.user_id - - return user_id, limit - - -@logger.catch -def main() -> None: - """Call respective functions to retrieve and upload posts based on user input.""" - - try: - user_id, limit = parse_args() - - if not user_id: - if config.twitter['user_id'] != 'None': - user_id = config.twitter['user_id'] - else: - logger.critical( - 'No user id specified! Pass --user-id to the script or configure the user_id in config.toml.', - ) - exit() - - if config.import_from_twitter['saucenao_enabled']: - config.auto_tagger['saucenao_enabled'] = True - else: - config.auto_tagger['saucenao_enabled'] = False - - if config.import_from_twitter['deepbooru_enabled']: - config.auto_tagger['deepbooru_enabled'] = True - else: - config.auto_tagger['deepbooru_forced'] = False - config.auto_tagger['deepbooru_enabled'] = False - - if not config.import_from_twitter['saucenao_enabled'] and not config.import_from_twitter['deepbooru_enabled']: - config.upload_media['auto_tag'] = False - - twitter = Twitter( - config.twitter['consumer_key'], - config.twitter['consumer_secret'], - config.twitter['access_token'], - config.twitter['access_token_secret'], - ) - - try: - tweets = twitter.get_media_from_liked_tweets(user_id, limit) - except errors.Unauthorized: - logger.critical( - 'You\'re unauthorized to retrieve the user\'s tweets! User profile is probably private. ' - 'Configure credentials in config.toml.', - ) - exit() - - logger.info(f'Found {len(tweets)} tweets with media attachments. Start importing...') - - for tweet in tqdm( - tweets, - ncols=80, - position=0, - leave=False, - total=len(tweets), - disable=config.import_from_twitter['hide_progress'], - ): - files = [] - for media in tweet[1]: - files.append(download_media(media['url'])) - - for index, file in enumerate(files): - # Check by md5 hash if file is already uploaded - md5 = get_md5sum(file) - result = szuru.get_posts(f'md5:{md5}') - - try: - next(result) - logger.debug(f'Skipping tweet, already exists: {tweet}') - except StopIteration: - logger.debug(f'Importing tweet: {tweet}') - - metadata = {'tags': ['tagme'], 'safety': 'unsafe', 'source': tweet[0]} - upload_media.main(file, tweet[1][index]['file_ext'], metadata) - - logger.success('Script finished importing!') - except KeyboardInterrupt: - print('') - logger.info('Received keyboard interrupt from user.') - exit(1) - - -if __name__ == '__main__': - main() diff --git a/src/szurubooru_toolkit/twitter.py b/src/szurubooru_toolkit/twitter.py deleted file mode 100644 index ef231bf1..00000000 --- a/src/szurubooru_toolkit/twitter.py +++ /dev/null @@ -1,201 +0,0 @@ -import re -from math import ceil - -import tweepy -from loguru import logger - - -class Twitter: - """Twitter API""" - - def __init__(self, consumer_key: str, consumer_secret: str, access_token: str, access_token_secret: str) -> None: - """Initializes a Tweepy client object as `self.client` with user credentials. - - We will use OAuth 1.0a authentication. - - For more information, see https://developer.twitter.com/en/docs/authentication/oauth-1-0a. - - Args: - consumer_key (str): See above link on how to generate one. - consumer_secret (str): See above link on how to generate one. - access_token (str): See above link on how to generate one. - access_token_secret (str): See above link on how to generate one. - """ - - self.client = tweepy.Client( - consumer_key=consumer_key, - consumer_secret=consumer_secret, - access_token=access_token, - access_token_secret=access_token_secret, - ) - - def get_media_from_liked_tweets(self, user_id: int, limit: int = 25) -> list: - """Retrieves media files from liked tweets from `user_id`. - - Args: - user_id (int): The user_id which should be queried. - limit (int): Limit the amount of tweets returned (default: 25). - - Returns: - list: A list which contains the tweet URL and the associated media file URLs as a tuple. - """ - - def set_media_refs(data: list, tweets: list) -> None: - """Appends a tuple of the Tweet url and their media ref links to a list. - - Args: - data (list): The data list from the Twitter response. - tweets (list): List where the tuples will get appended to. - """ - for tweet in data: - if tweet.attachments: - tweet_url = Twitter.get_tweet_url(tweet.entities['urls']) - media_refs = Twitter.get_media_refs(tweet.attachments['media_keys'], response.includes['media']) - tweets.append(tuple((tweet_url, media_refs))) - - if limit > 100: - total_posts_to_fetch = limit - limit = 100 - else: - total_posts_to_fetch = None - - response = self.client.get_liked_tweets( - user_id, - user_auth=True, - expansions=['attachments.media_keys'], - tweet_fields=['entities'], - media_fields=['url', 'variants'], - max_results=limit, - ) - - tweets = [] - set_media_refs(response.data, tweets) - - # If user limit is > 100, start pagination. - if total_posts_to_fetch: - try: - next_token = response.meta['next_token'] - except KeyError: - next_token = False - - total_pages = ceil(total_posts_to_fetch / 100) - page = 2 # We already retrieved page 1 - - while next_token and page <= total_pages: - # On the last page, retrieve only the last double digits posts from the limit. - # 1230 -> 30, 123 -> 23 - # If last two digits are 0s, assume 100 and fetch the max amount of posts. - if page == total_pages: - limit = int(str(total_posts_to_fetch)[-2:]) - if limit == 0: - limit = 100 - - response = self.client.get_liked_tweets( - user_id, - user_auth=True, - expansions=['attachments.media_keys'], - tweet_fields=['entities'], - media_fields=['url', 'variants'], - max_results=limit, - pagination_token=next_token, - ) - - try: - next_token = response.meta['next_token'] - except KeyError: # In case we reached the last page - next_token = False - - page += 1 - set_media_refs(response.data, tweets) - - return tweets - - @staticmethod - def get_tweet_url(entities_urls: dict) -> str: - """Extract and return the tweets URL. - - Args: - entities_urls (dict): The URL entities from a Tweepy tweet object. - - Returns: - str: The tweet's URL. - """ - - for entity in entities_urls: - if 'twitter.com' in entity['expanded_url']: - twitter_url = entity['url'] - - return twitter_url - - @staticmethod - def get_media_refs(media_keys: list, media_list: list) -> list: - """Match the tweets media attachments to the tweet itself. - - Since the media attachments are in a separete object from the tweet's data, - we have to piece those two together. - - Args: - media_keys (list): A list of media_keys from the tweet. - media_list (list): A list of Tweepy media objects which contains the media_key for reference. - - Returns: - list: A list with a dict which contains the media URL (up to 4096x4096 resolution) - and the file's extension. - """ - - media_refs = [] - for media in media_list: - if media.media_key in media_keys: - if media.type in ['video', 'animated_gif']: - video_url = Twitter.get_highest_quality_video(media.data['variants']) - file_ext = Twitter.get_file_ext(video_url) - media_refs.append({'url': video_url, 'file_ext': file_ext}) - else: - file_ext = Twitter.get_file_ext(media.url) - media_refs.append({'url': media.url + '?name=4096x4096', 'file_ext': file_ext}) - - return media_refs - - @staticmethod - def get_file_ext(url: str) -> str: - """Exctract and return the file extension. - - Args: - url (str): The Twitter file URL. - - Returns: - str: The file extension (without a dot). - """ - - try: - file_ext = re.findall(r'\.mp4|\.png|\.jpg|\.gif|\.webm', url)[0].replace('.', '') - except Exception as e: - file_ext = None - logger.debug(f'Could not extract file extension from "{url}": {e}') - - return file_ext - - @staticmethod - def get_highest_quality_video(variants: list) -> str: - """Return the highest quality video URL from a tweet. - Can be applied to `media_type` `animated_gif` as well. - - Args: - variants (list): The variants list of the Tweepy tweet object. - - Returns: - str: Video URL with the highest quality match. - """ - - bit_rates = [] - - for variant in variants: - if 'bit_rate' in variant: - bit_rates.append(variant['bit_rate']) - - highest_bitrate = max(bit_rates) - for variant in variants: - if 'bit_rate' in variant and variant['bit_rate'] == highest_bitrate: - video_url = variant['url'] - - return video_url From 5404b8ef255b32ce75a9a3fe56f49929568c7f58 Mon Sep 17 00:00:00 2001 From: reluce Date: Sat, 10 Feb 2024 11:55:56 +0100 Subject: [PATCH 3/3] bump version to 0.9.6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 32de1231..2c119f19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ force_single_line = true [tool.poetry] name = "szurubooru-toolkit" -version = "0.9.5" +version = "0.9.6" description = "Python package and script collection to manage szurubooru." authors = ["reluce "] license = "GPL-3.0-only"