From f4873e7c8ea8092c18fc9d5c61e2178cbecb8e01 Mon Sep 17 00:00:00 2001 From: nanos Date: Tue, 2 Jul 2024 07:34:15 +0100 Subject: [PATCH] backfil mentioned users in list timelines --- find_posts.py | 53 +++++++++++++++++++++++++++------------------------ uniq | 14 ++++++++++++++ 2 files changed, 42 insertions(+), 25 deletions(-) create mode 100644 uniq diff --git a/find_posts.py b/find_posts.py index 491922d7..da97399f 100644 --- a/find_posts.py +++ b/find_posts.py @@ -1381,6 +1381,30 @@ def get_list_users(server, list, token, max): logger.info(f"Found {len(accounts)} accounts in list {list['title']}") return accounts +def fetch_timeline_context(timeline_posts, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users): + known_context_urls = get_all_known_context_urls(arguments.server, timeline_posts,parsed_urls, seen_hosts) + add_context_urls(arguments.server, token, known_context_urls, seen_urls) + + # Backfill any post authors, and any mentioned users + if arguments.backfill_mentioned_users > 0: + mentioned_users = [] + cut_off = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(minutes=60) + for toot in timeline_posts: + these_users = [] + toot_created_at = parser.parse(toot['created_at']) + if len(mentioned_users) < 10 or (toot_created_at > cut_off and len(mentioned_users) < 30): + these_users.append(toot['account']) + if(len(toot['mentions'])): + these_users += toot['mentions'] + if(toot['reblog'] != None): + these_users.append(toot['reblog']['account']) + if(len(toot['reblog']['mentions'])): + these_users += toot['reblog']['mentions'] + for user in these_users: + if user not in mentioned_users and user['acct'] not in all_known_users: + mentioned_users.append(user) + + add_user_posts(arguments.server, token, filter_known_users(mentioned_users, all_known_users), recently_checked_users, all_known_users, seen_urls, seen_hosts) if __name__ == "__main__": start = datetime.now() @@ -1551,12 +1575,12 @@ def get_list_users(server, list, token, max): if arguments.from_lists: """Pull replies from lists""" lists = get_user_lists(arguments.server, token) + logger.info(f"Getting context for {len(lists)} lists") for user_list in lists: # Fill context from list if arguments.max_list_length > 0: timeline_toots = get_list_timeline(arguments.server, user_list, token, arguments.max_list_length) - known_context_urls = get_all_known_context_urls(arguments.server, timeline_toots,parsed_urls, seen_hosts) - add_context_urls(arguments.server, token, known_context_urls, seen_urls) + fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users) # Backfill profiles from list if arguments.max_list_accounts: @@ -1581,30 +1605,9 @@ def get_list_users(server, list, token, max): if arguments.home_timeline_length > 0: """Do the same with any toots on the key owner's home timeline """ + logger.info(f"Getting context for home timeline") timeline_toots = get_timeline(arguments.server, token, arguments.home_timeline_length) - known_context_urls = get_all_known_context_urls(arguments.server, timeline_toots,parsed_urls, seen_hosts) - add_context_urls(arguments.server, token, known_context_urls, seen_urls) - - # Backfill any post authors, and any mentioned users - if arguments.backfill_mentioned_users > 0: - mentioned_users = [] - cut_off = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(minutes=60) - for toot in timeline_toots: - these_users = [] - toot_created_at = parser.parse(toot['created_at']) - if len(mentioned_users) < 10 or (toot_created_at > cut_off and len(mentioned_users) < 30): - these_users.append(toot['account']) - if(len(toot['mentions'])): - these_users += toot['mentions'] - if(toot['reblog'] != None): - these_users.append(toot['reblog']['account']) - if(len(toot['reblog']['mentions'])): - these_users += toot['reblog']['mentions'] - for user in these_users: - if user not in mentioned_users and user['acct'] not in all_known_users: - mentioned_users.append(user) - - add_user_posts(arguments.server, token, filter_known_users(mentioned_users, all_known_users), recently_checked_users, all_known_users, seen_urls, seen_hosts) + fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users) if arguments.max_followings > 0: logger.info(f"Getting posts from last {arguments.max_followings} followings") diff --git a/uniq b/uniq new file mode 100644 index 00000000..dbb5fd34 --- /dev/null +++ b/uniq @@ -0,0 +1,14 @@ +Error getting context for toot https://bsd.network/@lattera/112695266248144937. Exception: Querying https://bsd.network/api/v1/statuses/112695266248144937/context prohibited by robots.txt +Error getting context for toot https://glitch.social/@wilbr/112708074029292084. Exception: Querying https://glitch.social/api/v1/statuses/112708074029292084/context prohibited by robots.txt +Error getting context for toot https://mastodon.bida.im/@redhotcyber/112693534053156334. Exception: Querying https://mastodon.bida.im/api/v1/statuses/112693534053156334/context prohibited by robots.txt +Error getting context for toot https://toot.cafe/@aardrian/112695640079712832. Exception: Querying https://toot.cafe/api/v1/statuses/112695640079712832/context prohibited by robots.txt +Error getting host node info for flipboard.com. Exception: Querying https://flipboard.com/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for fsebugoutzone.org. Exception: Querying https://fsebugoutzone.org/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for glitterkitten.co.uk. Exception: Querying https://glitterkitten.co.uk/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for kitty.town. Exception: Querying https://kitty.town/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for mastodon.bentasker.co.uk. Exception: Querying https://mastodon.bentasker.co.uk/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for puddle.town. Exception: Querying https://puddle.town/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for shitposter.world. Exception: Querying https://shitposter.world/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for threads.net. Exception: Querying https://threads.net/.well-known/nodeinfo prohibited by robots.txt +Error getting host node info for www.threads.net. Exception: Querying https://www.threads.net/.well-known/nodeinfo prohibited by robots.txt +Error getting user ID for user aardrian: Querying https://toot.cafe/api/v1/accounts/lookup?acct=aardrian prohibited by robots.txt