Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add yaml header check #184

Merged
merged 28 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
53969f2
Add yaml header check wip
gounux Mar 17, 2024
a43e560
Add tags check using JSON feed
gounux Mar 18, 2024
36dcb42
Add some tests
gounux Mar 23, 2024
bef103d
Add mandatory keys check
gounux Mar 23, 2024
1a57ebb
Check tags alphabetical order
gounux Apr 1, 2024
274e435
Remove publish date check
gounux Apr 17, 2024
892cc5c
Use utils function for checking if file exists
gounux Apr 26, 2024
b9301c7
Merge branch 'main' into feature/add-markdown-header-checker
Guts May 2, 2024
26c12ab
Merge branch 'main' into feature/add-markdown-header-checker
gounux May 3, 2024
45e7764
Use JSON feed client
gounux May 3, 2024
a2c0d57
Use frontmatter to load yaml metadata
gounux May 17, 2024
536e82a
Accept multiple path parameters
gounux May 17, 2024
7224615
Add missing dependency
gounux May 17, 2024
91176fd
Edit how image sizes are checked
gounux May 20, 2024
d45db58
Remove icon and subtitle from mandatory keys
gounux May 20, 2024
3633902
Check author markdown file
gounux May 23, 2024
f983c64
Fix author md check
gounux May 23, 2024
2a0cc93
Add image size by url function
gounux May 23, 2024
14c8a32
Add author md check tests
gounux May 23, 2024
ae13e58
Merge branch 'main' into feature/add-markdown-header-checker
gounux May 23, 2024
f4f5c63
Update geotribu_cli/content/header_check.py
gounux May 27, 2024
275fc9d
Update geotribu_cli/content/header_check.py
gounux May 27, 2024
2659b51
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 27, 2024
856213d
Update geotribu_cli/content/header_check.py
gounux May 27, 2024
77b838c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 27, 2024
61e8f56
Add missing imports
gounux May 27, 2024
d4bbcbd
Use slugger for author md file
gounux May 28, 2024
6a43fcb
No more unidecode
gounux May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions geotribu_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
parser_comments_broadcast,
parser_comments_latest,
parser_comments_read,
parser_header_check,
parser_images_optimizer,
parser_latest_content,
parser_mastodon_export,
Expand Down Expand Up @@ -223,6 +224,16 @@ def main(args: list[str] = None):
add_common_arguments(subcmd_upgrade)
parser_upgrade(subcmd_upgrade)

subcmd_header_check = subparsers.add_parser(
"header-check",
aliases=["header", "check", "header-check", "metadata"],
help="Vérifier entête markdown",
formatter_class=main_parser.formatter_class,
prog="header-check",
)
add_common_arguments(subcmd_header_check)
parser_header_check(subcmd_header_check)

# -- NESTED SUBPARSER : CREATE ---------------------------------------------------
subcmd_content_manager = subparsers.add_parser(
"creer",
Expand Down
192 changes: 192 additions & 0 deletions geotribu_cli/content/header_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import argparse
import logging
import os
import shutil
import uuid

import requests
import yaml
from PIL import Image

from geotribu_cli.__about__ import __executable_name__, __version__
from geotribu_cli.constants import GeotribuDefaults
from geotribu_cli.content.json_feed import JsonFeedClient

logger = logging.getLogger(__name__)
defaults_settings = GeotribuDefaults()

MANDATORY_KEYS = [
"title",
"subtitle",
"authors",
"categories",
"date",
"description",
"icon",
"license",
"tags",
]

# ############################################################################
# ########## CLI #################
# ################################


def parser_header_check(
subparser: argparse.ArgumentParser,
) -> argparse.ArgumentParser:
"""Set the argument parser subcommand.

Args:
subparser (argparse.ArgumentParser): parser to set up

Returns:
argparse.ArgumentParser: parser ready to use
"""
subparser.add_argument(
"content_path",
help="Chemin du fichier markdown dont l'entête est à vérifier",
type=str,
gounux marked this conversation as resolved.
Show resolved Hide resolved
metavar="content",
)
subparser.add_argument(
"-minr",
"--min-ratio",
dest="min_image_ratio",
default=1.2,
help="Ratio width/height minimum de l'image à vérifier",
)
subparser.add_argument(
"-maxr",
"--max-ratio",
dest="max_image_ratio",
default=1.5,
help="Ratio width/height maximum de l'image à vérifier",
)
subparser.add_argument(
"-r",
"--raise",
dest="raise_exceptions",
action="store_true",
default=False,
help="Lever des exceptions et donc arrêter le programme si des erreurs sont rencontrées",
)
subparser.set_defaults(func=run)
return subparser


# ############################################################################
# ########## MAIN ################
# ################################


def check_image_ratio(image_url: str, min_ratio: float, max_ratio: float) -> bool:
r = requests.get(
image_url,
headers={"User-Agent": f"{__executable_name__}v{__version__}"},
stream=True,
)
r.raise_for_status()
image_file_name = str(uuid.uuid4())
with open(image_file_name, "wb") as image_file:
r.raw.decode_content = True
try:
shutil.copyfileobj(r.raw, image_file)
with Image.open(image_file_name) as image:
width, height = image.width, image.height
ratio = width / height
return min_ratio <= ratio <= max_ratio
finally:
os.remove(image_file_name)
gounux marked this conversation as resolved.
Show resolved Hide resolved


def get_existing_tags() -> set[str]:
jfc = JsonFeedClient()
return jfc.get_tags(should_sort=True)
gounux marked this conversation as resolved.
Show resolved Hide resolved


def check_existing_tags(tags: list[str]) -> tuple[bool, set[str], set[str]]:
existing_tags = get_existing_tags()
all_exists = set(tags).issubset(existing_tags)
missing = set(tags).difference(existing_tags)
present = set(tags).intersection(existing_tags)
return all_exists, missing, present


def check_tags_order(tags: list[str]) -> bool:
for i in range(len(tags) - 1):
if tags[i] > tags[i + 1]:
return False
return True


def check_mandatory_keys(
keys: list[str], mandatory: list[str] = MANDATORY_KEYS
) -> tuple[bool, set[str]]:
missing = set()
for mk in mandatory:
if mk not in keys:
missing.add(mk)
return len(missing) == 0, missing


def run(args: argparse.Namespace) -> None:
"""Run the sub command logic.

Checks YAML header of a content

Args:
args (argparse.Namespace): arguments passed to the subcommand
"""
logger.debug(f"Running {args.command} with {args}")
content_path = args.content_path

if not os.path.exists(content_path):
gounux marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(f"Mayday ! Le fichier {content_path} n'existe pas !")

with open(content_path) as file:
content = file.read()
_, front_matter, _ = content.split("---", 2)
yaml_meta = yaml.safe_load(front_matter)
logger.debug(f"YAML metadata loaded : {yaml_meta}")

# check that image ratio is okayyy
if "image" in yaml_meta:
if not check_image_ratio(
yaml_meta["image"], args.min_image_ratio, args.max_image_ratio
):
msg = f"Le ratio de l'image n'est pas dans l'interface autorisé ({args.minratio} - {args.maxratio})"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Ratio image ok")

# check that tags already exist
all_exists, missing, _ = check_existing_tags(yaml_meta["tags"])
if not all_exists:
msg = f"Les tags suivants n'existent pas dans les contenus Geotribu précédents : {','.join(missing)}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Existence des tags ok")

# check if tags are alphabetically sorted
if not check_tags_order(yaml_meta["tags"]):
msg = "Les tags ne sont pas triés par ordre alphabétique"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Ordre alphabétique des tags ok")

# check that mandatory keys are present
all_present, missing = check_mandatory_keys(yaml_meta.keys(), MANDATORY_KEYS)
if not all_present:
msg = f"Les clés suivantes ne sont pas présentes dans l'entête markdown : {','.join(missing)}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Clés de l'entête ok")
27 changes: 27 additions & 0 deletions geotribu_cli/content/json_feed.py
gounux marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Any

import requests
from requests import Response

from geotribu_cli.__about__ import __executable_name__, __version__

HEADERS: dict = {
b"Accept": b"application/json",
b"User-Agent": bytes(f"{__executable_name__}/{__version__}", "utf8"),
}


class JsonFeedClient:
def __init__(self, url: str = "https://geotribu.fr/feed_json_created.json"):
"""Class initialization."""
self.url = url

@property
def get_items(self) -> list[dict[str, Any]]:
r: Response = requests.get(self.url, headers=HEADERS)
r.raise_for_status()
return r.json()["items"]

def get_tags(self, should_sort: bool = False) -> set[str]:
tags = set().union(*[i["tags"] for i in self.get_items])
return sorted(tags) if should_sort else tags
1 change: 1 addition & 0 deletions geotribu_cli/subcommands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from geotribu_cli.comments import parser_comments_broadcast # noqa: F401
from geotribu_cli.comments import parser_comments_latest # noqa: F401
from geotribu_cli.comments import parser_comments_read # noqa: F401
from geotribu_cli.content.header_check import parser_header_check # noqa: F401
from geotribu_cli.content.new_article import parser_new_article # noqa: F401
from geotribu_cli.images.images_optimizer import parser_images_optimizer # noqa: F401
from geotribu_cli.rss.rss_reader import parser_latest_content # noqa: F401
Expand Down
20 changes: 20 additions & 0 deletions tests/fixtures/content/2012-12-21_article_passe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
title: Article supposément rédigé dans le passé
subtitle: Article supposément rédigé dans le passé pour tests
authors:
- Jane Doe
categories:
- article
comments: true
date: 2012-12-21
description: Article supposément rédigé dans le passé
icon: octicons/server-16
license: beerware
robots: index, follow
tags:
- Fromage
- OSM
- QGIS
---

# Article supposément rédigé dans le futur
19 changes: 19 additions & 0 deletions tests/fixtures/content/2044-04-01_article_futur.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
title: Article supposément rédigé dans le futur
subtitle: Article supposément rédigé dans le futur pour tests
authors:
- Jane Doe
categories:
- article
comments: true
date: 2044-04-01
icon: octicons/server-16
robots: index, follow
tags:
- Fromage
- IGN
- QGIS
- OSM
---

# Article supposément rédigé dans le futur
64 changes: 64 additions & 0 deletions tests/test_yaml_header_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import unittest
from unittest.mock import patch

import yaml

from geotribu_cli.content.header_check import (
check_existing_tags,
check_mandatory_keys,
check_tags_order,
)


class TestYamlHeaderCheck(unittest.TestCase):
def setUp(self):
with open("tests/fixtures/content/2012-12-21_article_passe.md") as past_file:
past_content = past_file.read()
_, front_matter, _ = past_content.split("---", 2)
self.past_yaml_meta = yaml.safe_load(front_matter)

with open("tests/fixtures/content/2044-04-01_article_futur.md") as future_file:
future_content = future_file.read()
_, front_matter, _ = future_content.split("---", 2)
self.future_yaml_meta = yaml.safe_load(front_matter)

@patch("geotribu_cli.content.header_check.get_existing_tags")
def test_past_tags_existence(self, get_existing_tags_mock):
get_existing_tags_mock.return_value = ["QGIS", "OSM"]
tags_ok, missing_tags, present_tags = check_existing_tags(
self.past_yaml_meta["tags"]
)
self.assertFalse(tags_ok)
self.assertIn("Fromage", missing_tags)
self.assertIn("QGIS", present_tags)
self.assertIn("OSM", present_tags)

@patch("geotribu_cli.content.header_check.get_existing_tags")
def test_future_tags_existence(self, get_existing_tags_mock):
get_existing_tags_mock.return_value = ["Fromage", "IGN"]
tags_ok, missing_tags, present_tags = check_existing_tags(
self.future_yaml_meta["tags"]
)
self.assertFalse(tags_ok)
self.assertIn("QGIS", missing_tags)
self.assertIn("OSM", missing_tags)
self.assertIn("Fromage", present_tags)
self.assertIn("IGN", present_tags)

def test_past_tags_order(self):
self.assertTrue(check_tags_order(self.past_yaml_meta["tags"]))

def test_future_tags_order(self):
self.assertFalse(check_tags_order(self.future_yaml_meta["tags"]))

def test_past_mandatory_keys(self):
all_present, missing = check_mandatory_keys(self.past_yaml_meta.keys())
self.assertTrue(all_present)
self.assertEqual(len(missing), 0)

def test_future_mandatory_keys(self):
all_present, missing = check_mandatory_keys(self.future_yaml_meta.keys())
self.assertFalse(all_present)
self.assertEqual(len(missing), 2)
self.assertIn("license", missing)
self.assertIn("description", missing)
Loading