From 22d2f898ff1175435908d10976bc0e93253997f5 Mon Sep 17 00:00:00 2001 From: Thomas Perrot Date: Tue, 13 Mar 2018 11:08:48 +0100 Subject: [PATCH] Fixed concurrent access to cache file when using tldextract in multiple threads --- pylintrc | 2 +- tldextract/tldextract.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pylintrc b/pylintrc index 2a1de125..56cade43 100644 --- a/pylintrc +++ b/pylintrc @@ -358,7 +358,7 @@ max-statements=50 max-parents=7 # Maximum number of attributes for a class (see R0902). -max-attributes=7 +max-attributes=8 # Minimum number of public methods for a class (see R0903). min-public-methods=1 diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index 11eb1d8b..1cb68cb3 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -58,6 +58,7 @@ import logging import os import re +import threading import idna @@ -208,6 +209,7 @@ def __init__(self, cache_file=CACHE_FILE, suffix_list_urls=PUBLIC_SUFFIX_LIST_UR self.suffix_list_urls = tuple(url.strip() for url in suffix_list_urls if url.strip()) self.cache_file = os.path.expanduser(cache_file or '') + self.cache_file_lock = threading.Lock() self.fallback_to_snapshot = fallback_to_snapshot if not (self.suffix_list_urls or self.cache_file or self.fallback_to_snapshot): raise ValueError("The arguments you have provided disable all ways for tldextract " @@ -286,8 +288,8 @@ def _get_tld_extractor(self): 4. Bundled PSL snapshot file''' if self._extractor: return self._extractor - - tlds = self._get_cached_tlds() + with self.cache_file_lock: + tlds = self._get_cached_tlds() if tlds: tlds.extend(self.extra_suffixes) self._extractor = _PublicSuffixListTLDExtractor(tlds) @@ -311,7 +313,8 @@ def _get_tld_extractor(self): raise Exception("tlds is empty, but fallback_to_snapshot is set" " to false. Cannot proceed without tlds.") - self._cache_tlds(tlds) + with self.cache_file_lock: + self._cache_tlds(tlds) tlds.extend(self.extra_suffixes) self._extractor = _PublicSuffixListTLDExtractor(tlds)