From 653abc912daa90b344ccd178c26e1306bc831548 Mon Sep 17 00:00:00 2001 From: John Kurkowski Date: Sat, 24 Oct 2020 21:04:12 -0700 Subject: [PATCH] Catch permission error when making cache dir, as well as cache file (#211) Stanches #209. * Move makedir side-effect from string building helper function into try block * Duplicate warning from writing a single cache file * Log this at most once per app * Remove unused vars --- tests/conftest.py | 4 +++ tests/main_test.py | 27 ++++++++++++++++++++ tldextract/cache.py | 53 ++++++++++++++++++++++++++++++---------- tldextract/tldextract.py | 2 -- 4 files changed, 71 insertions(+), 15 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f3d93c84..7cb68869 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,10 +3,14 @@ import logging import pytest +import tldextract.cache @pytest.fixture(autouse=True) def reset_log_level(): """Automatically reset log level verbosity between tests. Generally want test output the Unix way: silence is golden.""" + tldextract.cache._DID_LOG_UNABLE_TO_CACHE = ( # pylint: disable=protected-access + False + ) logging.getLogger().setLevel(logging.WARN) diff --git a/tests/main_test.py b/tests/main_test.py index 96c55339..c6a0c6dd 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- '''Main tldextract unit tests.''' +import logging +import os import tempfile import pytest @@ -234,6 +236,31 @@ def test_result_as_dict(): assert result._asdict() == expected_dict +def test_cache_permission(mocker, monkeypatch, tmpdir): + """Emit a warning once that this can't cache the latest PSL.""" + + warning = mocker.patch.object(logging.getLogger("tldextract.cache"), "warning") + + def no_permission_makedirs(*args, **kwargs): + raise PermissionError( + """[Errno 13] Permission denied: + '/usr/local/lib/python3.7/site-packages/tldextract/.suffix_cache""" + ) + + monkeypatch.setattr(os, "makedirs", no_permission_makedirs) + + for _ in range(0, 2): + my_extract = tldextract.TLDExtract(cache_dir=tmpdir) + assert_extract( + "http://www.google.com", + ("www.google.com", "www", "google", "com"), + funs=(my_extract,), + ) + + assert warning.call_count == 1 + assert warning.call_args[0][0].startswith("unable to cache") + + @responses.activate def test_cache_timeouts(tmpdir): server = 'http://some-server.com' diff --git a/tldextract/cache.py b/tldextract/cache.py index ff013a44..e1e95aff 100644 --- a/tldextract/cache.py +++ b/tldextract/cache.py @@ -10,6 +10,8 @@ LOG = logging.getLogger(__name__) +_DID_LOG_UNABLE_TO_CACHE = False + class DiskCache: """Disk _cache that only works for jsonable values""" @@ -46,21 +48,26 @@ def set(self, namespace, key, value): cache_filepath = self._key_to_cachefile_path(namespace, key) try: + _make_dir(cache_filepath) with open(cache_filepath, "w") as cache_file: json.dump(value, cache_file) except OSError as ioe: - LOG.warning( - ( - "unable to cache %s.%s in %s. This could refresh the " - "Public Suffix List over HTTP every app startup. " - "Construct your `TLDExtract` with a writable `cache_dir` or " - "set `cache_dir=False` to silence this warning. %s" - ), - namespace, - key, - cache_filepath, - ioe, - ) + global _DID_LOG_UNABLE_TO_CACHE # pylint: disable=global-statement + if not _DID_LOG_UNABLE_TO_CACHE: + LOG.warning( + ( + "unable to cache %s.%s in %s. This could refresh the " + "Public Suffix List over HTTP every app startup. " + "Construct your `TLDExtract` with a writable `cache_dir` or " + "set `cache_dir=False` to silence this warning. %s" + ), + namespace, + key, + cache_filepath, + ioe, + ) + _DID_LOG_UNABLE_TO_CACHE = True + return None def clear(self): @@ -86,7 +93,6 @@ def _key_to_cachefile_path(self, namespace, key): cache_path = os.path.join(namespace_path, hashed_key + self.file_ext) - _make_dir(cache_path) return cache_path def run_and_cache(self, func, namespace, kwargs, hashed_argnames): @@ -97,6 +103,27 @@ def run_and_cache(self, func, namespace, kwargs, hashed_argnames): key_args = {k: v for k, v in kwargs.items() if k in hashed_argnames} cache_filepath = self._key_to_cachefile_path(namespace, key_args) lock_path = cache_filepath + ".lock" + try: + _make_dir(cache_filepath) + except OSError as ioe: + global _DID_LOG_UNABLE_TO_CACHE # pylint: disable=global-statement + if not _DID_LOG_UNABLE_TO_CACHE: + LOG.warning( + ( + "unable to cache %s.%s in %s. This could refresh the " + "Public Suffix List over HTTP every app startup. " + "Construct your `TLDExtract` with a writable `cache_dir` or " + "set `cache_dir=False` to silence this warning. %s" + ), + namespace, + key_args, + cache_filepath, + ioe, + ) + _DID_LOG_UNABLE_TO_CACHE = True + + return func(**kwargs) + with FileLock(lock_path, timeout=self.lock_timeout): try: result = self.get(namespace=namespace, key=key_args) diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index dcf12cd2..04744d1c 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -71,8 +71,6 @@ "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat", ) -CACHE = DiskCache(cache_dir=CACHE_DIR) - class ExtractResult(collections.namedtuple("ExtractResult", "subdomain domain suffix")): """namedtuple of a URL's subdomain, domain, and suffix."""