Skip to content

Commit

Permalink
Feature: add http caching mechanism (#309)
Browse files Browse the repository at this point in the history
This PR aims to improve plugin's performance regarding HTTP requests
which are performed to retrieve remote image length:

- add a new option `cache_dir`
- rely on https://pypi.org/project/CacheControl/ to manage local cache
requests

For now, it works only for GET requests. See upstream issue:
psf/cachecontrol#337
  • Loading branch information
Guts authored Jun 25, 2024
2 parents 96c0404 + 96fec9f commit 97ce5dd
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 9 deletions.
22 changes: 22 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,28 @@ Default: `<!-- more -->`

----

### :material-recycle: `cache_dir`: folder where to store plugin's cached files { #cache_dir }

The plugin implements a caching mechanism, ensuring that a remote media is only get once during its life-cycle on remote HTTP server (using [Cache Control](https://pypi.org/project/CacheControl/) under the hood). It is normally not necessary to specify this setting, except for when you want to change the path within your root directory where HTTP body and metadata files are cached.

If you want to change it, use:

``` yaml
plugins:
- rss:
cache_dir: my/custom/dir
```
It's strongly recommended to add the path to your `.gitignore` file in the root of your project:

``` title=".gitignore"
.cache
```

Default: `.cache/plugins/rss`.

----

### :material-tag-multiple: `categories`: item categories { #categories }

`categories`: list of page metadata values to use as [RSS item categories](https://www.w3schools.com/xml/rss_tag_category_item.asp).
Expand Down
4 changes: 4 additions & 0 deletions mkdocs_rss_plugin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from mkdocs.config import config_options
from mkdocs.config.base import Config

# package
from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER

# ############################################################################
# ########## Classes ###############
# ##################################
Expand Down Expand Up @@ -42,6 +45,7 @@ class RssPluginConfig(Config):
categories = config_options.Optional(
config_options.ListOfItems(config_options.Type(str))
)
cache_dir = config_options.Type(str, default=f"{DEFAULT_CACHE_FOLDER.resolve()}")
comments_path = config_options.Optional(config_options.Type(str))
date_from_meta = config_options.SubConfig(_DateFromMeta)
enabled = config_options.Type(bool, default=True)
Expand Down
1 change: 1 addition & 0 deletions mkdocs_rss_plugin/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# ########## Globals #############
# ################################

DEFAULT_CACHE_FOLDER = Path(".cache/plugins/rss")
DEFAULT_TEMPLATE_FOLDER = Path(__file__).parent / "templates"
DEFAULT_TEMPLATE_FILENAME = DEFAULT_TEMPLATE_FOLDER / "rss.xml.jinja2"
MKDOCS_LOGGER_NAME = "[RSS-plugin]"
Expand Down
10 changes: 6 additions & 4 deletions mkdocs_rss_plugin/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
self.config.enabled = False
return config

# cache dir
self.cache_dir = Path(self.config.cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
logger.debug(f"Caching HTTP requests to: {self.cache_dir.resolve()}")

# integrations - check if theme is Material and if social cards are enabled
self.integration_material_social_cards = IntegrationMaterialSocialCards(
mkdocs_config=config,
Expand All @@ -100,6 +105,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:

# instantiate plugin tooling
self.util = Util(
cache_dir=self.cache_dir,
use_git=self.config.use_git,
integration_material_social_cards=self.integration_material_social_cards,
)
Expand Down Expand Up @@ -169,10 +175,6 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
self.config.date_from_meta.default_time = datetime.strptime(
self.config.date_from_meta.default_time, "%H:%M"
)
print(
self.config.date_from_meta.default_time,
type(self.config.date_from_meta.default_time),
)
except (TypeError, ValueError) as err:
logger.warning(
"Config error: `date_from_meta.default_time` value "
Expand Down
18 changes: 15 additions & 3 deletions mkdocs_rss_plugin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
# 3rd party
import markdown
import urllib3
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import SeparateBodyFileCache
from git import (
GitCommandError,
GitCommandNotFound,
Expand All @@ -34,7 +36,11 @@
from requests.exceptions import ConnectionError, HTTPError

# package
from mkdocs_rss_plugin.constants import MKDOCS_LOGGER_NAME, REMOTE_REQUEST_HEADERS
from mkdocs_rss_plugin.constants import (
DEFAULT_CACHE_FOLDER,
MKDOCS_LOGGER_NAME,
REMOTE_REQUEST_HEADERS,
)
from mkdocs_rss_plugin.git_manager.ci import CiHandler
from mkdocs_rss_plugin.integrations.theme_material_social_plugin import (
IntegrationMaterialSocialCards,
Expand Down Expand Up @@ -67,6 +73,7 @@ class Util:
def __init__(
self,
path: str = ".",
cache_dir: Path = DEFAULT_CACHE_FOLDER,
use_git: bool = True,
integration_material_social_cards: Optional[
IntegrationMaterialSocialCards
Expand Down Expand Up @@ -122,8 +129,13 @@ def __init__(
self.social_cards = integration_material_social_cards

# http/s session
self.req_session = Session()
self.req_session.headers.update(REMOTE_REQUEST_HEADERS)
session = Session()
session.headers.update(REMOTE_REQUEST_HEADERS)
self.req_session = CacheControl(
sess=session,
cache=SeparateBodyFileCache(directory=cache_dir),
cacheable_methods=("GET", "HEAD"),
)

def build_url(
self, base_url: str, path: str, args_dict: Optional[dict] = None
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Common requirements
# -----------------------


cachecontrol[filecache] >=0.14,<1
GitPython>=3.1,<3.2
mkdocs>=1.5,<2
requests>=2.31,<3
Expand Down
45 changes: 45 additions & 0 deletions tests/dev/dev_cached_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import http.client
import logging
from pathlib import Path

import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

http.client.HTTPConnection.debuglevel = 1
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
req_log = logging.getLogger("requests.packages.urllib3")
req_log.setLevel(logging.DEBUG)
req_log.propagate = True


sess = CacheControl(
requests.Session(), cache=FileCache(".web_cache"), cacheable_methods=("HEAD", "GET")
)


# get requests
resp = sess.get("https://geotribu.fr")
resp_img = sess.get(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)

# try again, cache hit expected
resp = sess.get("https://geotribu.fr")
resp_img = sess.get(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)

# head requests
resp_img = sess.head(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)


# try again, cache hit expected
resp_img = sess.head(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)

print(list(Path(".web_cache").iterdir()))
6 changes: 5 additions & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
# 3rd party
from mkdocs.config.base import Config

# plugin target
from mkdocs_rss_plugin.config import RssPluginConfig

# plugin target
from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER
from mkdocs_rss_plugin.plugin import GitRssPlugin

# test suite
Expand Down Expand Up @@ -62,6 +64,7 @@ def test_plugin_config_defaults(self):
"abstract_chars_count": 160,
"abstract_delimiter": "<!-- more -->",
"categories": None,
"cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}",
"comments_path": None,
"date_from_meta": {
"as_creation": "git",
Expand Down Expand Up @@ -105,6 +108,7 @@ def test_plugin_config_image(self):
expected = {
"abstract_chars_count": 160,
"abstract_delimiter": "<!-- more -->",
"cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}",
"categories": None,
"comments_path": None,
"date_from_meta": {
Expand Down

0 comments on commit 97ce5dd

Please sign in to comment.