diff --git a/find_posts.py b/find_posts.py index 408f0f5b..9851820d 100644 --- a/find_posts.py +++ b/find_posts.py @@ -16,7 +16,7 @@ import defusedxml.ElementTree as ET import urllib.robotparser from urllib.parse import urlparse -import hashlib +import xxhash logger = logging.getLogger("FediFetcher") robotParser = urllib.robotparser.RobotFileParser() @@ -1076,7 +1076,7 @@ def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5): return result def get_robots_txt_cache_path(robots_url): - hash = hashlib.sha256(robots_url.encode('utf-8')) + hash = xxhash.xxh128(robots_url.encode('utf-8')) return os.path.join(arguments.state_dir, f'robots-{hash.hexdigest()}.txt') def get_cached_robots(robots_url): diff --git a/requirements.txt b/requirements.txt index dbfd30b5..a3fb88d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ requests==2.32.0 six==1.16.0 smmap==5.0.0 urllib3==1.26.19 +xxhash==3.4.1 \ No newline at end of file