From cb8b7e827f2751fb648aceb3cdc0bde8b6fb2005 Mon Sep 17 00:00:00 2001
From: benoit74 <benoit74@users.noreply.github.com>
Date: Fri, 22 Nov 2024 07:46:45 +0000
Subject: [PATCH 01/13] Significantly enhance the safety of metadata
 manipulation

---
 CHANGELOG.md                        |  18 +
 src/zimscraperlib/constants.py      |  29 --
 src/zimscraperlib/inputs.py         |   5 +
 src/zimscraperlib/zim/creator.py    | 324 +++++++---------
 src/zimscraperlib/zim/filesystem.py |  47 +--
 src/zimscraperlib/zim/metadata.py   | 558 ++++++++++++++++++++++------
 tests/inputs/test_inputs.py         |  12 +-
 tests/zim/conftest.py               |   9 +
 tests/zim/test_metadata.py          | 449 ++++++++++++++++++++--
 tests/zim/test_zim_creator.py       | 386 ++++++++++---------
 10 files changed, 1279 insertions(+), 558 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e287837e..7aefd01c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Renamed `filesystem.validate_zimfile_creatable` to `filesystem.file_creatable` to reflect general applicability to check file creation beyond ZIM files #200
 - Remove any "ZIM" reference in exceptions while working with files #200
+- Significantly enhance the safety of metadata manipulation (#205)
+  - add types for all metadata, one type per metadata name plus some generic ones for non-standard metadata
+    - all types are responsible to validate metadata value at initialization time
+    - validation checks for adherence to the ZIM specification and conventions are automated
+    - cleanup of unwanted control characters and stripping white characters are **automated in all text metadata**
+    - whenever possible, try to **automatically clean a "reasonably" bad metadata** (e.g. automaticall accept and remove duplicate tags - harmless - but not duplicate language codes - codes are supposed to be ordered, so it is a weird situation) ; this is an alignment of paradigm, because for some metadata the lib was permissive, while for other it was quite restrictive ; this PR tries to align this and **make the lib as permissive as possible**, avoiding to fail a scraper for something which could be automatically fixed
+    - it is now possible to disable ZIM conventions checks with `zim.metadata.check_metadata_conventions`
+  - simplify `zim.creator.Creator.config_metadata` by using these types and been more strict:
+    - add new `StandardMetadata` class for standard metadata, including list of mandatory one
+    - by default, all non-standard metadata must start with `X-` prefix
+      - this not yet an openZIM convention / specification, so it is possible to disable this check with `fail_on_missing_prefix` argument
+  - simplify `add_metadata`, use same metadata types
+  - simplify `zim.creator.Creator.start` with new types, and drop all metadata from memory after being passed to the libzim
+  - drop `zim.creator.convert_and_check_metadata` (not usefull anymore, simply use proper metadata type)
+  - move `MANDATORY_ZIM_METADATA_KEYS` and `DEFAULT_DEV_ZIM_METADATA` from `constants` to `zim.metadata` to avoid circular dependencies
+  - new `inputs.unique_values` utility function to compute the list of uniques values from a given list, but preserving initial list order
+  - in `__init__` of `zim.creator.Creator`, rename `disable_metadata_checks` to `check_metadata_conventions` for clarity and brevity
+    - beware that this manipulate the global `zim.metadata.check_metadata_conventions`, so if you have many creator running in parallel, they can't have different settings, last one initialized will "win"
 
 ### Added
 
diff --git a/src/zimscraperlib/constants.py b/src/zimscraperlib/constants.py
index 4e601121..c2a89ebd 100644
--- a/src/zimscraperlib/constants.py
+++ b/src/zimscraperlib/constants.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 # vim: ai ts=4 sts=4 et sw=4 nu
 
-import base64
 import pathlib
 import re
 
@@ -22,34 +21,6 @@
 # list of mimetypes we consider articles using it should default to FRONT_ARTICLE
 FRONT_ARTICLE_MIMETYPES = ["text/html"]
 
-# list of mandatory meta tags of the zim file.
-MANDATORY_ZIM_METADATA_KEYS = [
-    "Name",
-    "Title",
-    "Creator",
-    "Publisher",
-    "Date",
-    "Description",
-    "Language",
-    "Illustration_48x48@1",
-]
-
-DEFAULT_DEV_ZIM_METADATA = {
-    "Name": "Test Name",
-    "Title": "Test Title",
-    "Creator": "Test Creator",
-    "Publisher": "Test Publisher",
-    "Date": "2023-01-01",
-    "Description": "Test Description",
-    "Language": "fra",
-    # blank 48x48 transparent PNG
-    "Illustration_48x48_at_1": base64.b64decode(
-        "iVBORw0KGgoAAAANSUhEUgAAADAAAAAwAQMAAABtzGvEAAAAGXRFWHRTb2Z0d2FyZQBB"
-        "ZG9iZSBJbWFnZVJlYWR5ccllPAAAAANQTFRFR3BMgvrS0gAAAAF0Uk5TAEDm2GYAAAAN"
-        "SURBVBjTY2AYBdQEAAFQAAGn4toWAAAAAElFTkSuQmCC"
-    ),
-}
-
 RECOMMENDED_MAX_TITLE_LENGTH = 30
 MAXIMUM_DESCRIPTION_METADATA_LENGTH = 80
 MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH = 4000
diff --git a/src/zimscraperlib/inputs.py b/src/zimscraperlib/inputs.py
index 438a9fc0..23fee6b8 100644
--- a/src/zimscraperlib/inputs.py
+++ b/src/zimscraperlib/inputs.py
@@ -136,3 +136,8 @@ def compute_tags(
     return {
         tag.strip() for tag in list(default_tags) + (user_tags or "").split(";") if tag
     }
+
+
+def unique_values(items: list) -> list:
+    """Return unique values in input list while preserving list order"""
+    return list(dict.fromkeys(items))
diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
index a4558b8d..e0046c87 100644
--- a/src/zimscraperlib/zim/creator.py
+++ b/src/zimscraperlib/zim/creator.py
@@ -19,25 +19,20 @@
 
 from __future__ import annotations
 
-import datetime
 import io
 import logging
 import pathlib
 import re
 import weakref
-from collections.abc import Callable, Iterable
+from collections.abc import Callable
 from typing import Any
 
 import libzim.writer  # pyright: ignore
 import PIL.Image
-import regex  # pyright: ignore [reportMissingModuleSource]
 
+import zimscraperlib.zim.metadata
 from zimscraperlib import logger
-from zimscraperlib.constants import (
-    DEFAULT_DEV_ZIM_METADATA,
-    FRONT_ARTICLE_MIMETYPES,
-    MANDATORY_ZIM_METADATA_KEYS,
-)
+from zimscraperlib.constants import FRONT_ARTICLE_MIMETYPES
 from zimscraperlib.filesystem import (
     delete_callback,
     get_content_mimetype,
@@ -48,16 +43,12 @@
 from zimscraperlib.zim.indexing import IndexData
 from zimscraperlib.zim.items import StaticItem
 from zimscraperlib.zim.metadata import (
-    validate_counter,
-    validate_date,
-    validate_description,
-    validate_illustrations,
-    validate_language,
-    validate_longdescription,
-    validate_required_values,
-    validate_standard_str_types,
-    validate_tags,
-    validate_title,
+    DEFAULT_DEV_ZIM_METADATA,
+    MANDATORY_ZIM_METADATA_KEYS,
+    IllustrationMetadata,
+    LanguageMetadata,
+    Metadata,
+    StandardMetadataList,
 )
 
 DUPLICATE_EXC_STR = re.compile(
@@ -67,9 +58,6 @@
     re.MULTILINE | re.DOTALL,
 )
 
-# All control characters are disallowed in str metadata except \n, \r and \t
-UNWANTED_CONTROL_CHARACTERS_REGEX = regex.compile(r"(?![\n\t\r])\p{C}")
-
 
 def mimetype_for(
     path: str,
@@ -112,8 +100,8 @@ class Creator(libzim.writer.Creator):
     Meaning you should exit right after an exception in your code (during zim creation)
     Use workaround_nocancel=False to disable the workaround.
 
-    By default, all metadata are validated for compliance with openZIM guidelines and
-    conventions. Set disable_metadata_checks=True to disable this validation (you can
+    By default, all metadata are validated for compliance with openZIM specification and
+    conventions. Set check_metadata_conventions=True to disable this validation (you can
     still do checks manually with the validation methods or your own logic).
     """
 
@@ -125,10 +113,10 @@ def __init__(
         *,
         workaround_nocancel: bool | None = True,
         ignore_duplicates: bool | None = False,
-        disable_metadata_checks: bool = False,
+        check_metadata_conventions: bool = True,
     ):
         super().__init__(filename=filename)
-        self._metadata = {}
+        self._metadata: dict[str, Metadata] = {}
         self.__indexing_configured = False
         self.can_finish = True
 
@@ -143,7 +131,9 @@ def __init__(
 
         self.workaround_nocancel = workaround_nocancel
         self.ignore_duplicates = ignore_duplicates
-        self.disable_metadata_checks = disable_metadata_checks
+        zimscraperlib.zim.metadata.check_metadata_conventions = (
+            check_metadata_conventions
+        )
 
     def config_indexing(
         self, indexing: bool, language: str | None = None  # noqa: FBT001
@@ -151,7 +141,7 @@ def config_indexing(
         """Toggle full-text and title indexing of entries
 
         Uses Language metadata's value (or "") if not set"""
-        language = language or self._metadata.get("Language", "")
+        language = language or self._get_first_language_metadata_value() or ""
         if indexing and not is_valid_iso_639_3(language):
             raise ValueError("Not a valid ISO-639-3 language code")
         super().config_indexing(indexing, language)
@@ -159,217 +149,185 @@ def config_indexing(
         return self
 
     def _log_metadata(self):
-        """Log (DEBUG) all metadata set on (_metadata ~ config_metadata())
+        """Log in DEBUG level all metadata key and value"""
+        for name, metadata in sorted(self._metadata.items()):
+
+            if metadata is None:
+                logger.debug(f"Metadata: {name} is None")
+                continue
+
+            if not hasattr(metadata, "value"):
+                logger.debug(
+                    f"Metadata: {name} is improper metadata type: "
+                    f"{metadata.__class__.__qualname__}: {metadata!s}"
+                )
+                continue
 
-        Does not log metadata set post-start (via add_metadata())"""
-        for name, value in sorted(self._metadata.items()):
             # illustration mandates an Image
             if re.match(r"^Illustration_(\d+)x(\d+)@(\d+)$", name):
                 try:
-                    with PIL.Image.open(io.BytesIO(value)) as img:
+                    with PIL.Image.open(io.BytesIO(metadata.libzim_value)) as img:
                         logger.debug(
-                            f"Metadata: {name} is a {len(value)} bytes "
+                            f"Metadata: {name} is a {len(metadata.libzim_value)} bytes "
                             f"{img.size[0]}x{img.size[1]}px {img.format} Image"
                         )
-                except Exception:
-                    logger.debug(
-                        f"Metadata: {name} is a {len(value)} bytes "
-                        f"{get_content_mimetype(value[:64])} blob "
-                        "not recognized as an Image"
-                    )
-                continue
+                        continue
+                except Exception:  # noqa: S110 # nosec B110
+                    pass
 
             # bytes are either encoded string or arbitrary data
-            if isinstance(value, bytes):
-                mimetype = get_content_mimetype(value[:64])
+            if isinstance(metadata.value, bytes | io.BytesIO):
+                raw_value = (
+                    metadata.value
+                    if isinstance(metadata.value, bytes)
+                    else metadata.value.getvalue()
+                )
+                mimetype = get_content_mimetype(raw_value[:64])
                 if not mimetype.startswith("text/"):
                     logger.debug(
-                        f"Metadata: {name} is a {len(value)} bytes {mimetype} blob"
+                        f"Metadata: {name} is a {len(raw_value)} bytes {mimetype} blob"
                     )
                     continue
                 try:
-                    logger.debug(f"Metadata: {name} = {value.decode('UTF-8')}")
+                    logger.debug(f"Metadata: {name} = {raw_value.decode('UTF-8')}")
                 except Exception:
                     logger.debug(
-                        f"Metadata: {name} is a {len(value)} bytes {mimetype} blob "
+                        f"Metadata: {name} is a {len(raw_value)} bytes {mimetype} blob "
                         "not decodable as an UTF-8 string"
                     )
                 continue
 
             # rest is either printable or unexpected
             try:
-                logger.debug(f"Metadata: {name} = {value!s}")
+                logger.debug(f"Metadata: {name} = {metadata.value!s}")
             except Exception:
                 logger.debug(
-                    f"Metadata: {name} is unexpected data type: {type(value).__name__}"
+                    f"Metadata: {name} is unexpected data type: "
+                    f"{type(metadata.value).__qualname__}"
                 )
 
+    def _get_first_language_metadata_value(self) -> str | None:
+        """Private methods to get most popular lang code from Language metadata"""
+        for metadata in self._metadata.values():
+            if isinstance(metadata, LanguageMetadata):
+                return metadata.libzim_value.decode().split(",")[0]
+        return None
+
     def start(self):
+        """Start creator operation at libzim level
+
+        Includes creating metadata, including validating mandatory ones are set,
+        and configuring indexing.
+        """
         if logger.isEnabledFor(logging.DEBUG):  # pragma: no cover
             self._log_metadata()
 
         if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS):
-            raise ValueError("Mandatory metadata are not all set.")
-
-        if not self.disable_metadata_checks:
-            for name, value in self._metadata.items():
-                if value:
-                    self.validate_metadata(name, value)
+            missing_keys = [
+                key
+                for key in MANDATORY_ZIM_METADATA_KEYS
+                if not self._metadata.get(key)
+            ]
+            raise ValueError(
+                "Mandatory metadata are not all set. Missing metadata: "
+                f'{",".join(missing_keys)}. You should prefer to use '
+                "StandardMetadataList if possible."
+            )
 
-        language = self._metadata.get("Language", "").split(",")
-        if language[0] and not self.__indexing_configured:
-            self.config_indexing(True, language[0])
+        if (
+            language := self._get_first_language_metadata_value()
+        ) and not self.__indexing_configured:
+            self.config_indexing(True, language)
 
         super().__enter__()
 
-        self.add_illustration(48, self._metadata["Illustration_48x48@1"])
-        del self._metadata["Illustration_48x48@1"]
-        for name, value in self._metadata.items():
-            if value:
-                self.add_metadata(name, self.convert_and_check_metadata(name, value))
+        for metadata in self._metadata.values():
+            if isinstance(metadata, IllustrationMetadata):
+                self.add_illustration(metadata.size, metadata.libzim_value)
+            else:
+                self.add_metadata(metadata)
+        self._metadata.clear()
 
         return self
 
-    def validate_metadata(
+    def add_metadata(
         self,
-        name: str,
-        value: bytes | str,
+        value: Metadata,
+        mimetype: str = "text/plain;charset=UTF-8",
     ):
-        """Ensures metadata value for name is conform with the openZIM spec on Metadata
-
-        Also enforces recommendations
-        See https://wiki.openzim.org/wiki/Metadata"""
+        """Really add the metadata to the ZIM, after ZIM creation has started.
 
-        validate_required_values(name, value)
-        validate_standard_str_types(name, value)
+        You would probably prefer to use config_metadata methods to check metadata
+        before starting the ZIM, ensure all mandatory metadata are set, and avoid
+        duplicate metadata name.
+        """
 
-        validate_title(name, value)  # pyright: ignore
-        validate_date(name, value)  # pyright: ignore
-        validate_language(name, value)  # pyright: ignore
-        validate_counter(name, value)  # pyright: ignore
-        validate_description(name, value)  # pyright: ignore
-        validate_longdescription(name, value)  # pyright: ignore
-        validate_tags(name, value)  # pyright: ignore
-        validate_illustrations(name, value)  # pyright: ignore
+        super().add_metadata(value.name, value.libzim_value, mimetype)
 
-    def convert_and_check_metadata(
+    def config_metadata(
         self,
-        name: str,
-        value: str | bytes | datetime.date | datetime.datetime | Iterable[str],
-    ) -> str | bytes:
-        """Convert metadata to appropriate type for few known usecase and check type
+        std_metadata: StandardMetadataList | list[Metadata],
+        extra_metadata: list[Metadata] | None = None,
+        *,
+        fail_on_missing_prefix_in_extras: bool = True,
+    ):
+        """Checks and prepare list of ZIM metadata
 
-        Date: converts date and datetime to string YYYY-MM-DD
-        Tags: converts iterable to string with semi-colon separator
+        Checks ensure that metadata value can be converted to bytes, including all
+        requirements of the ZIM specifications and optionally openZIM conventions.
 
-        Also checks that final type is appropriate for libzim (str or bytes)
-        """
-        if name == "Date" and isinstance(value, datetime.date | datetime.datetime):
-            value = value.strftime("%Y-%m-%d")
-        if (
-            name == "Tags"
-            and not isinstance(value, str)
-            and not isinstance(value, bytes)
-            and isinstance(value, Iterable)
-        ):
-            value = ";".join(value)
-
-        if not isinstance(value, str) and not isinstance(value, bytes):
-            raise ValueError(f"Invalid type for {name}: {type(value)}")
+        Metadata are only kept in memory at this stage, not yet passed to libzim.
 
-        return value
+        They will be passed to libzim / writen to the ZIM on creator.start().
 
-    def add_metadata(
-        self,
-        name: str,
-        value: str | bytes,
-        mimetype: str = "text/plain;charset=UTF-8",
-    ):
-        # drop control characters before passing them to libzim
-        if isinstance(value, str):
-            value = UNWANTED_CONTROL_CHARACTERS_REGEX.sub("", value).strip(" \r\n\t")
-        if not self.disable_metadata_checks:
-            self.validate_metadata(name, value)
+        Arguments:
+            std_metadata: standard metadata defined in the ZIM specifications.
+                Prefer to use StandardMetadataList which ensure mandatory metadata are
+                all set.
+            extra_metadata: a list of extra metadata (not standard).
+            fail_on_missing_prefix_in_extras: disable the default check which force the
+                X- prefix on extra metadata name which is a convention to distinguish
+                these extra metadata
 
-        super().add_metadata(name, value, mimetype)
+        """
+        for fail_on_missing_prefix, metadata in [
+            (False, metadata)
+            for metadata in (
+                std_metadata.values()
+                if isinstance(std_metadata, StandardMetadataList)
+                else std_metadata
+            )
+        ] + [
+            (fail_on_missing_prefix_in_extras, metadata)
+            for metadata in extra_metadata or []
+        ]:
+            if fail_on_missing_prefix and not metadata.name.startswith("X-"):
+                raise ValueError(
+                    f"Metadata key {metadata.name} does not starts with X- as expected"
+                )
+            # if metadata.name in self._metadata:
+            #     raise ValueError(f"{metadata.name} cannot be defined twice")
+            self._metadata[metadata.name] = metadata
 
-    # there are many N803 problems, but they are intentional to match real tag name
-    def config_metadata(
-        self,
-        *,
-        Name: str,  # noqa: N803
-        Language: str,  # noqa: N803
-        Title: str,  # noqa: N803
-        Description: str,  # noqa: N803
-        LongDescription: str | None = None,  # noqa: N803
-        Creator: str,  # noqa: N803
-        Publisher: str,  # noqa: N803
-        Date: datetime.datetime | datetime.date | str,  # noqa: N803
-        Illustration_48x48_at_1: bytes,  # noqa: N803
-        Tags: Iterable[str] | str | None = None,  # noqa: N803
-        Scraper: str | None = None,  # noqa: N803
-        Flavour: str | None = None,  # noqa: N803
-        Source: str | None = None,  # noqa: N803
-        License: str | None = None,  # noqa: N803
-        Relation: str | None = None,  # noqa: N803
-        **extras: (
-            None
-            | float
-            | int
-            | bytes
-            | str
-            | datetime.datetime
-            | datetime.date
-            | Iterable[str]
-        ),
-    ):
-        """Sets all mandatory Metadata as well as standard and any other text ones"""
-        self._metadata.update(
-            {
-                "Name": Name,
-                "Title": Title,
-                "Creator": Creator,
-                "Publisher": Publisher,
-                "Date": Date,
-                "Description": Description,
-                "Language": Language,
-                "License": License,
-                "LongDescription": LongDescription,
-                "Tags": Tags,
-                "Relation": Relation,
-                "Flavour": Flavour,
-                "Source": Source,
-                "Scraper": Scraper,
-                "Illustration_48x48@1": Illustration_48x48_at_1,
-            }
-        )
-        self._metadata.update(extras)
-        for metadata_key, metadata_value in self._metadata.items():
-            # drop control characters so that proper value is stored in memory and
-            # logged in DEBUG mode ; also strip blank characters
-            if isinstance(metadata_value, str):
-                self._metadata[metadata_key] = UNWANTED_CONTROL_CHARACTERS_REGEX.sub(
-                    "", metadata_value
-                ).strip(" \r\n\t")
         return self
 
     def config_dev_metadata(
         self,
-        **extras: (
-            None
-            | int
-            | float
-            | bytes
-            | str
-            | datetime.datetime
-            | datetime.date
-            | Iterable[str]
-        ),
+        extra_metadata: Metadata | list[Metadata] | None = None,
     ):
-        """Calls config_metadata with default (yet overridable) values for dev"""
-        devel_default_metadata = DEFAULT_DEV_ZIM_METADATA.copy()
-        devel_default_metadata.update(extras)
-        return self.config_metadata(**devel_default_metadata)
+        """Calls minimal set of mandatory metadata with default values for dev
+
+        Extra metadata can be passed, and they are not checked for proper key prefix
+        """
+        return self.config_metadata(
+            std_metadata=DEFAULT_DEV_ZIM_METADATA,
+            extra_metadata=(
+                [extra_metadata]
+                if isinstance(extra_metadata, Metadata)
+                else extra_metadata
+            ),
+            fail_on_missing_prefix_in_extras=False,
+        )
 
     def add_item_for(
         self,
diff --git a/src/zimscraperlib/zim/filesystem.py b/src/zimscraperlib/zim/filesystem.py
index bdf0125a..6cbc73e3 100644
--- a/src/zimscraperlib/zim/filesystem.py
+++ b/src/zimscraperlib/zim/filesystem.py
@@ -38,6 +38,7 @@
 from zimscraperlib.filesystem import get_file_mimetype
 from zimscraperlib.html import find_title_in_file
 from zimscraperlib.types import get_mime_for_name
+from zimscraperlib.zim import metadata
 from zimscraperlib.zim.creator import Creator
 from zimscraperlib.zim.items import StaticItem
 
@@ -169,29 +170,31 @@ def make_zim_file(
         filename=fpath,
         main_path=main_page,
         ignore_duplicates=ignore_duplicates,
-        disable_metadata_checks=disable_metadata_checks,
+        check_metadata_conventions=disable_metadata_checks,
     ).config_metadata(
-        **{
-            k: v
-            for k, v in {
-                # (somewhat) mandatory
-                "Name": name,
-                "Title": title,
-                "Description": description,
-                "Date": date or datetime.date.today(),  # noqa: DTZ011
-                "Language": language,
-                "Creator": creator,
-                "Publisher": publisher,
-                # optional
-                "Tags": ";".join(tags) if tags else None,
-                "Source": source,
-                "Flavour": flavour,
-                "Scraper": scraper,
-                "LongDescription": long_description,
-                "Illustration_48x48_at_1": illustration_data,
-            }.items()
-            if v is not None
-        }
+        metadata.StandardMetadataList(
+            # mandatory
+            Name=metadata.NameMetadata(name),
+            Title=metadata.TitleMetadata(title),
+            Description=metadata.DescriptionMetadata(description),
+            Date=metadata.DateMetadata(date or datetime.date.today()),  # noqa: DTZ011
+            Language=metadata.LanguageMetadata(language),
+            Creator=metadata.CreatorMetadata(creator),
+            Publisher=metadata.PublisherMetadata(publisher),
+            Illustration_48x48_at_1=metadata.IllustrationMetadata(
+                "Illustration_48x48@1", illustration_data
+            ),
+            # optional
+            Tags=metadata.TagsMetadata(list(tags)) if tags else None,
+            Source=metadata.SourceMetadata(source) if source else None,
+            Flavour=metadata.FlavourMetadata(flavour) if flavour else None,
+            Scraper=metadata.ScraperMetadata(scraper) if scraper else None,
+            LongDescription=(
+                metadata.LongDescriptionMetadata(long_description)
+                if long_description
+                else None
+            ),
+        )
     )
 
     zim_file.start()
diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py
index 411e42f6..0fe4f508 100644
--- a/src/zimscraperlib/zim/metadata.py
+++ b/src/zimscraperlib/zim/metadata.py
@@ -1,21 +1,34 @@
 from __future__ import annotations
 
+import base64
 import datetime
+import io
 import re
-from collections.abc import Iterable
-from typing import Any
+from dataclasses import dataclass
 
 import regex
 
 from zimscraperlib.constants import (
     ILLUSTRATIONS_METADATA_RE,
-    MANDATORY_ZIM_METADATA_KEYS,
     MAXIMUM_DESCRIPTION_METADATA_LENGTH,
     MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH,
     RECOMMENDED_MAX_TITLE_LENGTH,
 )
 from zimscraperlib.i18n import is_valid_iso_639_3
 from zimscraperlib.image.probing import is_valid_image
+from zimscraperlib.inputs import unique_values
+
+# All control characters are disallowed in str metadata except \n, \r and \t
+UNWANTED_CONTROL_CHARACTERS_REGEX = regex.compile(r"(?![\n\t\r])\p{C}")
+
+# flag to enable / disable check conventions (e.g. title shorter than 30 chars,
+# description shorter than 80 chars, ...)
+check_metadata_conventions: bool = True
+
+
+def clean_str(value: str) -> str:
+    """Clean a string value for unwanted control characters and strip white chars"""
+    return UNWANTED_CONTROL_CHARACTERS_REGEX.sub("", value).strip(" \r\n\t")
 
 
 def nb_grapheme_for(value: str) -> int:
@@ -23,123 +36,436 @@ def nb_grapheme_for(value: str) -> int:
     return len(regex.findall(r"\X", value))
 
 
-def validate_required_values(name: str, value: Any):
-    """ensures required ones have a value (spec doesnt requires it but makes sense)"""
-    if name in MANDATORY_ZIM_METADATA_KEYS and not value:
-        raise ValueError(f"Missing value for {name}")
-
-
-def validate_standard_str_types(
-    name: str,
-    value: str | bytes,
-):
-    """ensures standard string metadata are indeed str"""
-    if name in (
-        "Name",
-        "Title",
-        "Creator",
-        "Publisher",
-        "Description",
-        "LongDescription",
-        "License",
-        "Relation",
-        "Relation",
-        "Flavour",
-        "Source",
-        "Scraper",
-    ) and not isinstance(value, str):
-        raise ValueError(f"Invalid type for {name}: {type(value)}")
-
-
-def validate_title(name: str, value: str):
-    """ensures Title metadata is within recommended length"""
-    if name == "Title" and nb_grapheme_for(value) > RECOMMENDED_MAX_TITLE_LENGTH:
-        raise ValueError(f"{name} is too long.")
-
-
-def validate_date(name: str, value: datetime.datetime | datetime.date | str):
-    """ensures Date metadata can be casted to an ISO 8601 string"""
-    if name == "Date":
-        if not isinstance(value, datetime.datetime | datetime.date | str):
-            raise ValueError(f"Invalid type for {name}: {type(value)}")
-        elif isinstance(value, str):
-            match = re.match(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})", value)
-            if not match:
-                raise ValueError(f"Invalid {name} format, not matching regex")
-            try:
-                datetime.date(**{k: int(v) for k, v in match.groupdict().items()})
-            except Exception as exc:
-                raise ValueError(f"Invalid {name} format: {exc}") from None
-
-
-def validate_language(name: str, value: Iterable[str] | str):
-    """ensures Language metadata is a single or list of ISO-639-3 codes"""
-    if name == "Language":
-        if isinstance(value, str):
-            value = value.split(",")
-        for code in value:
-            if not is_valid_iso_639_3(code):
-                raise ValueError(f"{code} is not ISO-639-3.")
+class Metadata:
+    """A very basic metadata, with a name and a bytes or io.BytesIO value
+
+    Compliance with ZIM specification is done at initialisation, value passed at
+    initialization is kept in memory in `value` attribute, target name is stored in
+    `name` value and conversion to libzim value is available with `libzim_value`
+    property.
+    """
+
+    def __init__(self, name: str, value: bytes | io.BytesIO) -> None:
+        if not isinstance(value, bytes | io.BytesIO):
+            raise ValueError(
+                f"Unexpected type passed to {self.__class__.__qualname__}: "
+                f"{value.__class__.__qualname__}"
+            )
+        if name == "Counter":
+            raise ValueError("Counter cannot be set. libzim sets it.")
+        self.name = name
+        self.value = value
+        libzim_value = self.libzim_value  # to check for errors
+        if libzim_value is None or len(libzim_value) == 0:
+            raise ValueError("Cannot set empty metadata")
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        if isinstance(self.value, io.BytesIO):
+            return self.value.getvalue()
+        return self.value
 
 
-def validate_counter(name: str, _: str):
-    """ensures Counter metadata is not manually set"""
-    if name == "Counter":
-        raise ValueError(f"{name} cannot be set. libzim sets it.")
-
-
-def validate_description(name: str, value: str):
-    """ensures Description metadata is with required length"""
-    if (
-        name == "Description"
-        and nb_grapheme_for(value) > MAXIMUM_DESCRIPTION_METADATA_LENGTH
-    ):
-        raise ValueError(f"{name} is too long.")
-
-
-def validate_longdescription(name: str, value: str):
-    """ensures LongDescription metadata is with required length"""
-    if (
-        name == "LongDescription"
-        and nb_grapheme_for(value) > MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
-    ):
-        raise ValueError(f"{name} is too long.")
-
-
-def validate_tags(name: str, value: Iterable[str] | str):
-    """ensures Tags metadata is either one or a list of strings"""
-    if name == "Tags" and (
-        not isinstance(value, Iterable)
-        or not all(isinstance(tag, str) for tag in value)
-    ):
-        raise ValueError(f"Invalid type(s) for {name}")
-    if (
-        name == "Tags"
-        and not isinstance(value, str)
-        and isinstance(value, Iterable)
-        and len(set(value)) != len(list(value))
-    ):
-        raise ValueError(f"Duplicate tags are not valid: {value}")
-    if name == "Tags" and isinstance(value, str):
-        values = value.split(";")
-        if len(set(values)) != len(list(values)):
-            raise ValueError(f"Duplicate tags are not valid: {value}")
-
-
-def validate_illustrations(name: str, value: bytes):
-    """ensures illustrations are PNG images or the advertised size"""
+class _TextMetadata(Metadata):
+    """A basic metadata whose value is expected to be some text"""
+
+    def __init__(self, name: str, value: bytes | io.BytesIO | str) -> None:
+        if not isinstance(value, bytes | io.BytesIO | str):
+            raise ValueError(
+                f"Unexpected type passed to {self.__class__.__qualname__}: "
+                f"{value.__class__.__qualname__}"
+            )
+        super().__init__(
+            name=name,
+            value=(
+                value.encode()
+                if isinstance(value, str)
+                else value.getvalue() if isinstance(value, io.BytesIO) else value
+            ),
+        )
+        self.value = value
+        _ = self.libzim_value  # to check for errors
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        # decode and reencode byte types to validate it's proper UTF-8 text
+        if isinstance(self.value, io.BytesIO):
+            str_value = self.value.getvalue().decode()
+        elif isinstance(self.value, bytes):
+            str_value = self.value.decode()
+        else:
+            str_value = self.value
+        return clean_str(str_value).encode()
+
+
+def _check_for_allowed_custom_metadata_name(name: str, class_name: str):
+    """Check that metadata name is not among the standard ones for which a type exist"""
+    if name in DEFAULT_DEV_ZIM_METADATA.__dict__.keys():
+        # this list contains the 'bad' illustration keys, but we don't care, they should
+        # still not be used
+        raise ValueError(
+            f"It is not allowed to use {class_name} for standard {name} "
+            f"metadata. Please use {name}Metadata type for proper checks"
+        )
     if name.startswith("Illustration_"):
-        match = ILLUSTRATIONS_METADATA_RE.match(name)
-        if match and not is_valid_image(
-            image=value,
-            imformat="PNG",
-            size=(
-                int(match.groupdict()["width"]),
-                int(match.groupdict()["height"]),
+        raise ValueError(
+            f"It is not allowed to use {class_name} for standard Illustration"
+            "metadata. Please use IllustrationMetadata type for proper checks"
+        )
+
+
+class CustomTextMetadata(_TextMetadata):
+    """A text metadata for which we do little checks"""
+
+    def __init__(self, name: str, value: bytes | io.BytesIO | str) -> None:
+        _check_for_allowed_custom_metadata_name(name, self.__class__.__qualname__)
+        super().__init__(name=name, value=value)
+
+
+class CustomMetadata(Metadata):
+    """A bytes metadata for which we do little checks"""
+
+    def __init__(self, name: str, value: bytes | io.BytesIO) -> None:
+        _check_for_allowed_custom_metadata_name(name, self.__class__.__qualname__)
+        super().__init__(name=name, value=value)
+
+
+class _MandatoryTextMetadata(_TextMetadata):
+    """A mandatory (value must be set) text metadata"""
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        value = super().libzim_value
+        if len(value) == 0:
+            raise ValueError("Missing value for mandatory metadata")
+        return value
+
+
+class _MandatoryMetadata(Metadata):
+    """A mandatory (value must be set) bytes metadata"""
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        value = super().libzim_value
+        if len(value) == 0:
+            raise ValueError("Missing value for mandatory metadata")
+        return value
+
+
+class TitleMetadata(_MandatoryTextMetadata):
+    """The Title metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__(name="Title", value=value)
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        value = super().libzim_value
+        if check_metadata_conventions:
+            if nb_grapheme_for(value.decode()) > RECOMMENDED_MAX_TITLE_LENGTH:
+                raise ValueError("Title is too long.")
+        return value
+
+
+class DescriptionMetadata(_MandatoryTextMetadata):
+    """The Description metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__(name="Description", value=value)
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        value = super().libzim_value
+        if check_metadata_conventions:
+            if nb_grapheme_for(value.decode()) > MAXIMUM_DESCRIPTION_METADATA_LENGTH:
+                raise ValueError("Description is too long.")
+        return value
+
+
+class LongDescriptionMetadata(_MandatoryTextMetadata):
+    """The LongDescription metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__(name="LongDescription", value=value)
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        value = super().libzim_value
+        if check_metadata_conventions:
+            if (
+                nb_grapheme_for(value.decode())
+                > MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
+            ):
+                raise ValueError("LongDescription is too long.")
+        return value
+
+
+class DateMetadata(_TextMetadata):
+    """The Date metadata"""
+
+    def __init__(self, value: bytes | str | datetime.date | datetime.datetime) -> None:
+        if not isinstance(value, bytes | str | datetime.date | datetime.datetime):
+            raise ValueError(
+                f"Unexpected type passed to {self.__class__.__qualname__}: "
+                f"{value.__class__.__qualname__}"
+            )
+        super().__init__(
+            name="Date",
+            value=(
+                value
+                if isinstance(value, bytes)
+                else (
+                    value.encode()
+                    if isinstance(value, str)
+                    else value.strftime("%Y-%m-%d").encode()
+                )
             ),
+        )
+        self.value = value
+        _ = self.libzim_value  # to check for errors
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        if isinstance(self.value, datetime.date | datetime.datetime):
+            return self.value.strftime("%Y-%m-%d").encode()
+        match = re.match(
+            r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})",
+            self.value.decode() if isinstance(self.value, bytes) else self.value,
+        )
+        if not match:
+            raise ValueError("Invalid date format, not matching regex yyyy-mm-dd")
+        try:
+            datetime.date(**{k: int(v) for k, v in match.groupdict().items()})
+        except Exception as exc:
+            raise ValueError(f"Invalid date format: {exc}") from None
+        return self.value if isinstance(self.value, bytes) else self.value.encode()
+
+
+class IllustrationMetadata(_MandatoryMetadata):
+    """Any Illustration_**x**@* metadata"""
+
+    def __init__(self, name: str, value: bytes | io.BytesIO) -> None:
+        super().__init__(name=name, value=value)
+        _ = self.libzim_value  # to check for errors
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        match = ILLUSTRATIONS_METADATA_RE.match(self.name)
+        if not match:
+            raise ValueError("Illustration metadata has improper name")
+        self.size = int(match.groupdict()["height"])
+        if int(match.groupdict()["width"]) != self.size:
+            raise ValueError("Illustration is not squared")
+        if not is_valid_image(
+            image=self.value,
+            imformat="PNG",
+            size=(self.size, self.size),
         ):
             raise ValueError(
-                f"{name} is not a "
-                f"{match.groupdict()['width']}x{match.groupdict()['height']} "
-                "PNG Image"
+                f"{self.name} is not a valid {self.size}x{self.size} PNG Image"
             )
+        if isinstance(self.value, io.BytesIO):
+            return self.value.getvalue()
+        else:
+            return self.value
+
+
+class LanguageMetadata(_MandatoryTextMetadata):
+    """The Language metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str | list[str] | set[str]) -> None:
+        if not isinstance(value, bytes | io.BytesIO | str | list | set):
+            raise ValueError(
+                f"Unexpected type passed to {self.__class__.__qualname__}: "
+                f"{value.__class__.__qualname__}"
+            )
+        if isinstance(value, list | set) and not all(
+            isinstance(item, str) for item in value
+        ):
+            bad_types = {item.__class__.__qualname__ for item in value} - {"str"}
+            raise ValueError(
+                f"Invalid type(s) found in Iterable: {",".join(bad_types)}"
+            )
+        super().__init__(
+            name="Language",
+            value=",".join(value) if isinstance(value, list | set) else value,
+        )
+        self.value = value
+        _ = self.libzim_value  # to check for errors
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        if isinstance(self.value, bytes | io.BytesIO | str):
+            codes = [
+                clean_str(code) for code in super().libzim_value.decode().split(",")
+            ]
+        else:
+            codes = [clean_str(code) for code in self.value]
+        codes = [code for code in codes if code]  # remove empty values
+        if len(codes) == 0:
+            raise ValueError("Missing value for mandatory Language metadata")
+        if len(set(codes)) != len(codes):
+            raise ValueError("Duplicate codes not allowed in Language metadata")
+        for code in codes:
+            if not is_valid_iso_639_3(code):
+                raise ValueError(f"{code} is not ISO-639-3.")
+        return ",".join(codes).encode()
+
+
+class TagsMetadata(_TextMetadata):
+    """The Tags metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str | list[str] | set[str]) -> None:
+        if not isinstance(value, bytes | io.BytesIO | str | list | set):
+            raise ValueError(
+                f"Unexpected type passed to {self.__class__.__qualname__}: "
+                f"{value.__class__.__qualname__}"
+            )
+        if isinstance(value, list | set) and not all(
+            isinstance(item, str) for item in value
+        ):
+            bad_types = {item.__class__.__qualname__ for item in value} - {"str"}
+            raise ValueError(
+                f"Invalid type(s) found in Iterable: {",".join(bad_types)}"
+            )
+        super().__init__(
+            name="Tags",
+            value=";".join(value) if isinstance(value, list | set) else value,
+        )
+        self.value = value
+        _ = self.libzim_value  # to check for errors
+
+    @property
+    def libzim_value(self) -> bytes:
+        """The value to pass to the libzim for creating the metadata"""
+        if isinstance(self.value, bytes | io.BytesIO | str):
+            tags = unique_values(
+                [clean_str(tag) for tag in super().libzim_value.decode().split(";")]
+            )
+        else:
+            tags = unique_values([clean_str(tag) for tag in self.value])
+        tags = [tag for tag in tags if tag]  # remove empty values
+        return ";".join(tags).encode()
+
+
+class NameMetadata(_MandatoryTextMetadata):
+    """The Name metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Name", value)
+
+
+class CreatorMetadata(_MandatoryTextMetadata):
+    """The Creator metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Creator", value)
+
+
+class PublisherMetadata(_MandatoryTextMetadata):
+    """The Publisher metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Publisher", value)
+
+
+class ScraperMetadata(_TextMetadata):
+    """The Scraper metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Scraper", value)
+
+
+class FlavourMetadata(_TextMetadata):
+    """The Flavour metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Flavour", value)
+
+
+class SourceMetadata(_TextMetadata):
+    """The Source metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Source", value)
+
+
+class LicenseMetadata(_TextMetadata):
+    """The License metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("License", value)
+
+
+class RelationMetadata(_TextMetadata):
+    """The Relation metadata"""
+
+    def __init__(self, value: bytes | io.BytesIO | str) -> None:
+        super().__init__("Relation", value)
+
+
+@dataclass
+class StandardMetadataList:
+    """A class holding all openZIM standard metadata
+
+    Useful to ensure that all mandatory metadata are set, no typo occurs in metadata
+    name and the specification is respected (forbidden duplicate values, ...).
+    """
+
+    Name: NameMetadata
+    Language: LanguageMetadata
+    Title: TitleMetadata
+    Creator: CreatorMetadata
+    Publisher: PublisherMetadata
+    Date: DateMetadata
+    Illustration_48x48_at_1: IllustrationMetadata
+    Description: DescriptionMetadata
+    LongDescription: LongDescriptionMetadata | None = None
+    Tags: TagsMetadata | None = None
+    Scraper: ScraperMetadata | None = None
+    Flavour: FlavourMetadata | None = None
+    Source: SourceMetadata | None = None
+    License: LicenseMetadata | None = None
+    Relation: RelationMetadata | None = None
+
+    def values(self) -> list[Metadata]:
+        return [v for v in self.__dict__.values() if v]
+
+
+# A sample standard metadata list, to be used for dev purposes when one does not care
+# at all about metadata values ; this ensures all metadata are compliant with the spec
+DEFAULT_DEV_ZIM_METADATA = StandardMetadataList(
+    Name=NameMetadata("Test Name"),
+    Title=TitleMetadata("Test Title"),
+    Creator=CreatorMetadata("Test Creator"),
+    Publisher=PublisherMetadata("Test Publisher"),
+    Date=DateMetadata("2023-01-01"),
+    Description=DescriptionMetadata("Test Description"),
+    Language=LanguageMetadata("fra"),
+    # blank 48x48 transparent PNG
+    Illustration_48x48_at_1=IllustrationMetadata(
+        "Illustration_48x48@1",
+        base64.b64decode(
+            "iVBORw0KGgoAAAANSUhEUgAAADAAAAAwAQMAAABtzGvEAAAAGXRFWHRTb2Z0d2FyZQBB"
+            "ZG9iZSBJbWFnZVJlYWR5ccllPAAAAANQTFRFR3BMgvrS0gAAAAF0Uk5TAEDm2GYAAAAN"
+            "SURBVBjTY2AYBdQEAAFQAAGn4toWAAAAAElFTkSuQmCC"
+        ),
+    ),
+)
+
+# list of mandatory metadata of the zim file, automatically computed
+MANDATORY_ZIM_METADATA_KEYS = [
+    metadata.name
+    for metadata in DEFAULT_DEV_ZIM_METADATA.__dict__.values()
+    if isinstance(metadata, _MandatoryTextMetadata | _MandatoryMetadata)
+]
diff --git a/tests/inputs/test_inputs.py b/tests/inputs/test_inputs.py
index fd8a2cda..016b3371 100644
--- a/tests/inputs/test_inputs.py
+++ b/tests/inputs/test_inputs.py
@@ -9,18 +9,18 @@
 
 import zimscraperlib
 from zimscraperlib.constants import CONTACT
-from zimscraperlib.constants import (
-    MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
-)
-from zimscraperlib.constants import (
-    MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
-)
 from zimscraperlib.constants import NAME as PROJECT_NAME
 from zimscraperlib.inputs import (
     compute_descriptions,
     compute_tags,
     handle_user_provided_file,
 )
+from zimscraperlib.zim.metadata import (
+    MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
+)
+from zimscraperlib.zim.metadata import (
+    MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
+)
 
 
 def test_with_none():
diff --git a/tests/zim/conftest.py b/tests/zim/conftest.py
index 1efb5129..eeaf4544 100644
--- a/tests/zim/conftest.py
+++ b/tests/zim/conftest.py
@@ -3,6 +3,8 @@
 
 import pytest
 
+import zimscraperlib.zim.metadata
+
 
 @pytest.fixture(scope="function")
 def html_str():
@@ -101,3 +103,10 @@ def counters():
         "application/xml": 1,
         "image/svg+xml": 5,
     }
+
+
+@pytest.fixture
+def ignore_metadata_conventions():
+    zimscraperlib.zim.metadata.check_metadata_conventions = False
+    yield
+    zimscraperlib.zim.metadata.check_metadata_conventions = True
diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py
index fce742ea..24cdc8e3 100644
--- a/tests/zim/test_metadata.py
+++ b/tests/zim/test_metadata.py
@@ -1,7 +1,12 @@
 from __future__ import annotations
 
+import base64
+import dataclasses
+import datetime
+import io
+import pathlib
 import re
-from collections.abc import Iterable
+from typing import NamedTuple
 
 import pytest
 
@@ -9,55 +14,437 @@
 
 
 @pytest.mark.parametrize(
-    "name, value",
+    "value",
     [
-        ("Language", "fra"),
-        ("Language", "fra,eng"),
-        ("Language", ["fra", "eng"]),
-        ("Other", "not_an_iso_639_3_code"),
+        ("fra"),
+        ("eng"),
+        ("bam"),
+        ("fra,eng"),
+        ("eng, fra"),
+        ("fra,eng,bam"),
+        (["fra", "eng"]),
+        ("fra, eng"),  # codes are automatically cleaned-up
+        ("eng,  \r"),  # codes are automatically cleaned-up
+        (["fra", "  "]),  # codes are automatically cleaned-up
     ],
 )
-def test_validate_language_valid(name: str, value: Iterable[str] | str):
-    metadata.validate_language(name, value)
+def test_validate_language_valid(value: list[str] | str):
+    assert metadata.LanguageMetadata(value)
 
 
 @pytest.mark.parametrize(
-    "name, value",
+    "value,error",
     [
-        ("Language", "fr"),
-        ("Language", "fra;eng"),
-        ("Language", "fra, eng"),
+        ("", "Missing value for mandatory metadata"),
+        ("fr", "is not ISO-639-3"),
+        ("en", "is not ISO-639-3"),
+        ("xxx", "is not ISO-639-3"),
+        ("rmr", "is not ISO-639-3"),
+        ("fra,en,bam", "is not ISO-639-3"),
+        ("fra;eng", "is not ISO-639-3"),
+        ("fra,fra", "Duplicate codes not allowed in Language metadata"),
+        (["fra", "fra"], "Duplicate codes not allowed in Language metadata"),
+        ([], "Missing value for mandatory metadata"),
+        (["  ", "\t"], "Missing value for mandatory Language metadata"),
+        ("  , ", "Missing value for mandatory Language metadata"),
+        (["fra", 1], "Invalid type(s) found in Iterable:"),
+        (["fra", b"1"], "Invalid type(s) found in Iterable:"),
+        (["fra", 1, b"1"], "Invalid type(s) found in Iterable:"),
     ],
 )
-def test_validate_language_invalid(name: str, value: Iterable[str] | str):
-    with pytest.raises(ValueError, match=re.escape("is not ISO-639-3")):
-        metadata.validate_language(name, value)
+def test_validate_language_invalid(value: list[str] | str, error: str):
+    with pytest.raises(ValueError, match=re.escape(error)):
+        assert metadata.LanguageMetadata(value)
 
 
 @pytest.mark.parametrize(
     "tags, is_valid",
     [
-        pytest.param("", True, id="empty_string"),
-        pytest.param("tag1", True, id="empty_string"),
+        pytest.param("wikipedia", True, id="simple_tag"),
         pytest.param("taaaag1", True, id="many_letters"),
-        pytest.param("tag1;tag2", True, id="semi_colon_distinct_1"),
-        pytest.param("tag2;tag2", False, id="semi_colon_identical"),
+        pytest.param("wikipedia;   ", True, id="remove_empty_values"),
+        pytest.param("wikipedia ;  football", True, id="semi_colon_distinct_1"),
+        pytest.param("football;football", True, id="semi_colon_identical"),
         pytest.param("tag,1;tug,1", True, id="semi_colon_distinct_2"),
         pytest.param(
-            "tag1,tag2", True, id="comma"
+            "wikipedia,football", True, id="comma"
         ),  # we cannot say that this ought to be a tags separator
-        pytest.param({"tag1"}, True, id="one_tag_in_set"),
-        pytest.param({"tag1", "tag2"}, True, id="two_tags_in_set"),
-        pytest.param(1, False, id="one_int"),
-        pytest.param(None, False, id="none_value"),
-        pytest.param(["tag1", "tag2"], True, id="two_distinct"),
-        pytest.param(["tag1", "tag1"], False, id="two_identical"),
-        pytest.param(["tag1", 1], False, id="int_in_list"),
+        pytest.param({"wikipedia"}, True, id="one_tag_in_set"),
+        pytest.param({"wikipedia", "football"}, True, id="two_tags_in_set"),
+        pytest.param(["wikipedia", "football"], True, id="two_distinct"),
+        pytest.param(["wikipedia", "wikipedia"], True, id="two_identical"),
     ],
 )
-def test_validate_tags(tags, is_valid):
+def test_validate_tags_valid(tags, is_valid):
     if is_valid:
-        metadata.validate_tags("Tags", tags)
+        metadata.TagsMetadata(tags)
     else:
-        with pytest.raises(ValueError):
-            metadata.validate_tags("Tags", tags)
+        with pytest.raises((ValueError, TypeError)):
+            metadata.TagsMetadata(tags)
+
+
+@pytest.mark.parametrize(
+    "value, error",
+    [
+        pytest.param("", "Cannot set empty metadata", id="empty_string"),
+        pytest.param(1, "Unexpected type passed to TagsMetadata: int", id="one_int"),
+        pytest.param(
+            None, "Unexpected type passed to TagsMetadata: NoneType", id="none_value"
+        ),
+        pytest.param(
+            ["wikipedia", 1], "Invalid type(s) found in Iterable: int", id="int_in_list"
+        ),
+    ],
+)
+def test_validate_tags_invalid(value: list[str] | str, error: str):
+    with pytest.raises(ValueError, match=re.escape(error)):
+        metadata.TagsMetadata(value)
+
+
+def test_validate_dedup_tags():
+    assert (
+        metadata.TagsMetadata("  wikipedia  \t   ;   wikipedia").libzim_value
+        == b"wikipedia"
+    )
+
+
+def test_validate_short_title_check_enabled():
+    assert metadata.TitleMetadata("T" * 30)
+
+
+def test_validate_short_grapheme_title_check_enabled():
+    assert metadata.TitleMetadata("में" * 30)
+
+
+def test_validate_too_long_title_check_enabled():
+    with pytest.raises(ValueError, match="Title is too long"):
+        assert metadata.TitleMetadata("T" * 31)
+
+
+def test_validate_too_long_title_check_disabled(
+    ignore_metadata_conventions,  # noqa: ARG001
+):
+    assert metadata.TitleMetadata("T" * 31)
+
+
+def test_validate_short_description_check_enabled():
+    assert metadata.DescriptionMetadata("T" * 80)
+
+
+def test_validate_short_grapheme_description_check_enabled():
+    assert metadata.DescriptionMetadata("में" * 80)
+
+
+def test_validate_too_long_description_check_enabled():
+    with pytest.raises(ValueError, match="Description is too long"):
+        assert metadata.DescriptionMetadata("T" * 81)
+
+
+def test_validate_too_long_description_check_disabled(
+    ignore_metadata_conventions,  # noqa: ARG001
+):
+    assert metadata.DescriptionMetadata("T" * 81)
+
+
+def test_validate_short_longdescription_check_enabled():
+    assert metadata.LongDescriptionMetadata("T" * 4000)
+
+
+def test_validate_short_grapheme_longdescription_check_enabled():
+    assert metadata.LongDescriptionMetadata("में" * 4000)
+
+
+def test_validate_too_long_longdescription_check_enabled():
+    with pytest.raises(ValueError, match="Description is too long"):
+        assert metadata.LongDescriptionMetadata("T" * 4001)
+
+
+def test_validate_too_long_longdescription_check_disabled(
+    ignore_metadata_conventions,  # noqa: ARG001
+):
+    assert metadata.LongDescriptionMetadata("T" * 4001)
+
+
+def test_validate_date_datetime_date():
+    assert metadata.DateMetadata(datetime.date(2024, 12, 11))
+
+
+def test_validate_date_datetime_datetime():
+    assert metadata.DateMetadata(
+        datetime.datetime(2024, 12, 11, 0, 0, 0, tzinfo=datetime.UTC)
+    )
+
+
+@pytest.mark.parametrize("value", [("9999-99-99"), ("2023/02/29"), ("1969-13-31")])
+def test_validate_date_invalid_date(value):
+    with pytest.raises(ValueError, match="Invalid date format"):
+        metadata.DateMetadata(value)
+
+
+def test_validate_illustration_invalid_name():
+    with pytest.raises(ValueError, match="Illustration metadata has improper name"):
+        metadata.IllustrationMetadata("Illustration_48x48_at_1", b"")
+
+
+def test_validate_illustration_not_squared():
+    with pytest.raises(ValueError, match="Illustration is not squared"):
+        metadata.IllustrationMetadata("Illustration_48x96@1", b"")
+
+
+def test_validate_illustration_wrong_sizes(png_image2):
+    with open(png_image2, "rb") as fh:
+        png_data = fh.read()
+    with pytest.raises(
+        ValueError, match="Illustration_48x48@1 is not a valid 48x48 PNG Image"
+    ):
+        metadata.IllustrationMetadata("Illustration_48x48@1", png_data)
+
+
+def test_blank_metadata():
+    with pytest.raises(ValueError, match="Cannot set empty metadata"):
+        metadata.Metadata("Blank", b"")
+
+
+class MetadataInitConfig(NamedTuple):
+    a_type: type
+    nb_args: int
+
+
+@pytest.fixture(
+    params=[
+        MetadataInitConfig(metadata.Metadata, 2),
+        MetadataInitConfig(metadata._TextMetadata, 2),
+        MetadataInitConfig(metadata.CustomTextMetadata, 2),
+        MetadataInitConfig(metadata.CustomMetadata, 2),
+        MetadataInitConfig(metadata._MandatoryTextMetadata, 2),
+        MetadataInitConfig(metadata._MandatoryMetadata, 2),
+        MetadataInitConfig(metadata.TitleMetadata, 1),
+        MetadataInitConfig(metadata.DescriptionMetadata, 1),
+        MetadataInitConfig(metadata.LongDescriptionMetadata, 1),
+        MetadataInitConfig(metadata.DateMetadata, 1),
+        MetadataInitConfig(metadata.IllustrationMetadata, 2),
+        MetadataInitConfig(metadata.LanguageMetadata, 1),
+        MetadataInitConfig(metadata.TagsMetadata, 1),
+        MetadataInitConfig(metadata.NameMetadata, 1),
+        MetadataInitConfig(metadata.CreatorMetadata, 1),
+        MetadataInitConfig(metadata.PublisherMetadata, 1),
+        MetadataInitConfig(metadata.ScraperMetadata, 1),
+        MetadataInitConfig(metadata.FlavourMetadata, 1),
+        MetadataInitConfig(metadata.SourceMetadata, 1),
+        MetadataInitConfig(metadata.LicenseMetadata, 1),
+        MetadataInitConfig(metadata.RelationMetadata, 1),
+    ]
+)
+def metadata_init(request: pytest.FixtureRequest) -> MetadataInitConfig:
+    return request.param
+
+
+def test_none_metadata_two_args(metadata_init: MetadataInitConfig):
+    with pytest.raises(
+        ValueError,
+        match=f"Unexpected type passed to {metadata_init.a_type.__qualname__}: "
+        "NoneType",
+    ):
+        if metadata_init.nb_args == 2:
+            metadata_init.a_type(
+                "Foo",
+                None,  # pyright: ignore[reportArgumentType]
+            )
+        elif metadata_init.nb_args == 1:
+            metadata_init.a_type(None)
+
+
+@pytest.mark.parametrize(
+    "a_type",
+    [
+        (metadata.Metadata),
+        (metadata.CustomTextMetadata),
+        (metadata._TextMetadata),
+        (metadata._MandatoryMetadata),
+    ],
+)
+def test_reserved_counter_metadata(a_type: type):
+    with pytest.raises(ValueError, match="Counter cannot be set. libzim sets it."):
+        a_type("Counter", b"Foo")
+
+
+@pytest.mark.parametrize(
+    "metadata_name",
+    [
+        ("Name"),
+        ("Language"),
+        ("Title"),
+        ("Creator"),
+        ("Publisher"),
+        ("Date"),
+        ("Illustration_48x48@1"),
+        ("Illustration_96x96@1"),
+        ("Description"),
+        ("LongDescription"),
+        ("Tags"),
+        ("Scraper"),
+        ("Flavour"),
+        ("Source"),
+        ("License"),
+        ("Relation"),
+    ],
+)
+@pytest.mark.parametrize(
+    "metadata_type",
+    [
+        (metadata.CustomMetadata),
+        (metadata.CustomTextMetadata),
+    ],
+)
+def test_reserved_names(metadata_name: str, metadata_type: type):
+    with pytest.raises(ValueError, match="It is not allowed to use"):
+        metadata_type(metadata_name, b"a value")
+
+
+def test_mandatory_value(metadata_init: MetadataInitConfig):
+
+    with pytest.raises(
+        ValueError,
+        match=(
+            "is not a valid 48x48 PNG Image"
+            if metadata_init.a_type == metadata.IllustrationMetadata
+            else (
+                "Invalid date format, not matching regex"
+                if metadata_init.a_type == metadata.DateMetadata
+                else "Cannot set empty metadata|Missing value for mandatory metadata"
+            )
+        ),
+    ):
+        if metadata_init.nb_args == 2:
+            metadata_init.a_type(
+                (
+                    "Foo"
+                    if metadata_init.a_type != metadata.IllustrationMetadata
+                    else "Illustration_48x48@1"
+                ),
+                b"",
+            )
+        elif metadata_init.nb_args == 1:
+            metadata_init.a_type(b"")
+
+
+def test_clean_value(metadata_init: MetadataInitConfig):
+    raw_value = b"\t\n\r\n \tA val \t\awith  \bcontrol chars\v\n"
+    clean_value = b"A val \twith  control chars"
+    if metadata_init.a_type in (
+        # some types do not support the raw value
+        metadata.IllustrationMetadata,
+        metadata.LanguageMetadata,
+        metadata.DateMetadata,
+        # and binary types are not cleaned-up
+        metadata.Metadata,
+        metadata.CustomMetadata,
+        metadata._MandatoryMetadata,
+    ):
+        pytest.skip("unsupported configuration")  # ignore these test cases
+    if metadata_init.nb_args == 2:
+        assert metadata_init.a_type(("Foo"), raw_value).libzim_value == clean_value
+    elif metadata_init.nb_args == 1:
+        assert metadata_init.a_type(raw_value).libzim_value == clean_value
+
+
+# @pytest.mark.parametrize(
+#     "metadata, expected_value",
+#     [
+#         (metadata.Metadata("Foo", b"a value"), b"a value"),
+#         (metadata.Metadata("Foo", io.BytesIO(b"a value")), b"a value"),
+#         (metadata.DescriptionMetadata(io.BytesIO(b"a value")), b"a value"),
+#         (metadata.DescriptionMetadata(b"a value"), b"a value"),
+#         (metadata.DescriptionMetadata("a value"), b"a value"),
+#         (metadata.IllustrationMetadata(png), b"a value"),
+#     ],
+# )
+def test_libzim_bytes_value(metadata_init: MetadataInitConfig, png_image: pathlib.Path):
+    if metadata_init.nb_args == 2:
+        if metadata_init.a_type == metadata.IllustrationMetadata:
+            with open(png_image, "rb") as fh:
+                png_data = fh.read()
+            assert (
+                metadata.IllustrationMetadata(
+                    "Illustration_48x48@1", png_data
+                ).libzim_value
+                == png_data
+            )
+        else:
+            assert metadata_init.a_type("Foo", b"a value").libzim_value == b"a value"
+    elif metadata_init.nb_args == 1:
+        if metadata_init.a_type == metadata.DateMetadata:
+            assert metadata_init.a_type(b"2023-12-11").libzim_value == b"2023-12-11"
+        elif metadata_init.a_type == metadata.LanguageMetadata:
+            assert metadata_init.a_type(b"fra,eng").libzim_value == b"fra,eng"
+        else:
+            assert metadata_init.a_type(b"a value").libzim_value == b"a value"
+
+
+def test_libzim_io_bytesio_value(
+    metadata_init: MetadataInitConfig, png_image: pathlib.Path
+):
+    if metadata_init.nb_args == 2:
+        if metadata_init.a_type == metadata.IllustrationMetadata:
+            with open(png_image, "rb") as fh:
+                png_data = fh.read()
+            assert (
+                metadata.IllustrationMetadata(
+                    "Illustration_48x48@1", io.BytesIO(png_data)
+                ).libzim_value
+                == png_data
+            )
+        else:
+            assert (
+                metadata_init.a_type("Foo", io.BytesIO(b"a value")).libzim_value
+                == b"a value"
+            )
+    elif metadata_init.nb_args == 1:
+        if metadata_init.a_type == metadata.DateMetadata:
+            pass  # Not supported
+        elif metadata_init.a_type == metadata.LanguageMetadata:
+            assert (
+                metadata_init.a_type(io.BytesIO(b"fra,eng")).libzim_value == b"fra,eng"
+            )
+        else:
+            assert (
+                metadata_init.a_type(io.BytesIO(b"a value")).libzim_value == b"a value"
+            )
+
+
+def test_std_metadata_values():
+    test_value = dataclasses.replace(metadata.DEFAULT_DEV_ZIM_METADATA)
+    test_value.License = metadata.LicenseMetadata("Creative Commons CC0")
+    values = test_value.values()
+    assert len(values) == 9
+
+    expected_values: list[metadata.Metadata] = [
+        metadata.LicenseMetadata("Creative Commons CC0"),
+        metadata.NameMetadata("Test Name"),
+        metadata.TitleMetadata("Test Title"),
+        metadata.CreatorMetadata("Test Creator"),
+        metadata.PublisherMetadata("Test Publisher"),
+        metadata.DateMetadata("2023-01-01"),
+        metadata.DescriptionMetadata("Test Description"),
+        metadata.LanguageMetadata("fra"),
+        # blank 48x48 transparent PNG
+        metadata.IllustrationMetadata(
+            "Illustration_48x48@1",
+            base64.b64decode(
+                "iVBORw0KGgoAAAANSUhEUgAAADAAAAAwAQMAAABtzGvEAAAAGXRFWHRTb2Z0d2FyZQBB"
+                "ZG9iZSBJbWFnZVJlYWR5ccllPAAAAANQTFRFR3BMgvrS0gAAAAF0Uk5TAEDm2GYAAAAN"
+                "SURBVBjTY2AYBdQEAAFQAAGn4toWAAAAAElFTkSuQmCC"
+            ),
+        ),
+    ]
+    for value in values:
+        found = False
+        for expected_value in expected_values:
+            if value.__class__ == expected_value.__class__:
+                assert value.value == expected_value.value
+                found = True
+                break
+        if not found:
+            pytest.fail(
+                f"Did not found matching expected values for {value.name} metadata"
+            )
diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py
index 17425f02..2fd7f3f2 100644
--- a/tests/zim/test_zim_creator.py
+++ b/tests/zim/test_zim_creator.py
@@ -3,8 +3,6 @@
 
 from __future__ import annotations
 
-import base64
-import datetime
 import io
 import logging
 import pathlib
@@ -14,19 +12,37 @@
 import sys
 import tempfile
 import time
+from typing import NamedTuple
 from unittest.mock import call, patch
 
 import pytest
 from libzim.writer import Compression  # pyright: ignore
 
-from zimscraperlib.constants import (
-    DEFAULT_DEV_ZIM_METADATA,
-    MANDATORY_ZIM_METADATA_KEYS,
-    UTF8,
-)
+from zimscraperlib.constants import UTF8
 from zimscraperlib.download import save_large_file, stream_file
 from zimscraperlib.filesystem import delete_callback
 from zimscraperlib.zim import Archive, Creator, StaticItem, URLItem
+from zimscraperlib.zim.metadata import (
+    DEFAULT_DEV_ZIM_METADATA,
+    CreatorMetadata,
+    CustomMetadata,
+    CustomTextMetadata,
+    DateMetadata,
+    DescriptionMetadata,
+    FlavourMetadata,
+    IllustrationMetadata,
+    LanguageMetadata,
+    LicenseMetadata,
+    LongDescriptionMetadata,
+    Metadata,
+    NameMetadata,
+    PublisherMetadata,
+    ScraperMetadata,
+    SourceMetadata,
+    StandardMetadataList,
+    TagsMetadata,
+    TitleMetadata,
+)
 from zimscraperlib.zim.providers import FileLikeProvider, URLProvider
 
 
@@ -60,7 +76,7 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn:
     with open(png_image, "rb") as fh:
         png_data = fh.read()
     with Creator(fpath, main_path).config_dev_metadata(
-        Tags=tags, Illustration_48x48_at_1=png_data
+        [TagsMetadata(tags), IllustrationMetadata("Illustration_48x48@1", png_data)]
     ) as creator:
         # verbatim HTML from string
         creator.add_item_for("welcome", "wel", content=html_str, is_front=True)
@@ -92,8 +108,10 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn:
     assert fpath.exists()
 
     reader = Archive(fpath)
-    assert reader.get_text_metadata("Title") == DEFAULT_DEV_ZIM_METADATA["Title"]
-    assert reader.get_text_metadata("Language") == DEFAULT_DEV_ZIM_METADATA["Language"]
+    assert reader.get_text_metadata("Title") == DEFAULT_DEV_ZIM_METADATA.Title.value
+    assert (
+        reader.get_text_metadata("Language") == DEFAULT_DEV_ZIM_METADATA.Language.value
+    )
     assert reader.get_text_metadata("Tags") == tags
     assert reader.main_entry.get_item().path == f"{main_path}"
     # make sure we have our image
@@ -134,7 +152,7 @@ def test_create_without_workaround(tmp_path):
 
 def test_noindexlanguage(tmp_path):
     fpath = tmp_path / "test.zim"
-    creator = Creator(fpath, "welcome").config_dev_metadata(Language="bam")
+    creator = Creator(fpath, "welcome").config_dev_metadata(LanguageMetadata("bam"))
     creator.config_indexing(False)
     with creator as creator:
         creator.add_item(StaticItem(path="welcome", content="hello"))
@@ -536,20 +554,6 @@ def test_without_metadata(tmp_path):
         Creator(tmp_path, "").start()
 
 
-def test_check_metadata(tmp_path):
-    with pytest.raises(ValueError, match="Counter cannot be set"):
-        Creator(tmp_path, "").config_dev_metadata(Counter=1).start()
-
-    with pytest.raises(ValueError, match="Invalid type for Foo"):
-        Creator(tmp_path, "").config_dev_metadata(Foo=1).start()
-
-    with pytest.raises(ValueError, match="Description is too long."):
-        Creator(tmp_path, "").config_dev_metadata(Description="T" * 90).start()
-
-    with pytest.raises(ValueError, match="LongDescription is too long."):
-        Creator(tmp_path, "").config_dev_metadata(LongDescription="T" * 5000).start()
-
-
 @pytest.mark.parametrize(
     "tags",
     [
@@ -575,42 +579,62 @@ def test_start_logs_metadata_log_contents(mocked_logger, png_image, tags, tmp_pa
     fpath = tmp_path / "test_config.zim"
     with open(png_image, "rb") as fh:
         png_data = fh.read()
-    creator = Creator(fpath, "", disable_metadata_checks=True).config_metadata(
-        Name="wikipedia_fr_football",
-        Title="English Wikipedia",
-        Creator="English speaking Wikipedia contributors",
-        Publisher="Wikipedia user Foobar",
-        Date="2009-11-21",
-        Description="All articles (without images) from the english Wikipedia",
-        LongDescription="This ZIM file contains all articles (without images)"
-        " from the english Wikipedia by 2009-11-10. The topics are...",
-        Language="eng",
-        License="CC-BY",
-        Tags=tags,
-        Flavour="nopic",
-        Source="https://en.wikipedia.org/",
-        Scraper="mwoffliner 1.2.3",
-        Illustration_48x48_at_1=png_data,
-        TestMetadata="Test Metadata",
+    creator = Creator(fpath, "", check_metadata_conventions=False).config_metadata(
+        StandardMetadataList(
+            Name=NameMetadata("wikipedia_fr_football"),
+            Title=TitleMetadata("English Wikipedia"),
+            Creator=CreatorMetadata("English speaking Wikipedia contributors"),
+            Publisher=PublisherMetadata("Wikipedia user Foobar"),
+            Date=DateMetadata("2009-11-21"),
+            Description=DescriptionMetadata(
+                "All articles (without images) from the english Wikipedia"
+            ),
+            LongDescription=LongDescriptionMetadata(
+                "This ZIM file contains all articles (without images)"
+                " from the english Wikipedia by 2009-11-10. The topics are..."
+            ),
+            Language=LanguageMetadata("eng"),
+            License=LicenseMetadata("CC-BY"),
+            Tags=TagsMetadata(tags),
+            Flavour=FlavourMetadata("nopic"),
+            Source=SourceMetadata("https://en.wikipedia.org/"),
+            Scraper=ScraperMetadata("mwoffliner 1.2.3"),
+            Illustration_48x48_at_1=IllustrationMetadata(
+                "Illustration_48x48@1", png_data
+            ),
+        ),
+        [CustomTextMetadata("TestMetadata", "Test Metadata")],
+        fail_on_missing_prefix_in_extras=False,
     )
 
-    class NotPrintable:
+    class NotPrintable(str):
         def __str__(self):
             raise ValueError("Not printable I said")
 
     creator._metadata.update(
         {
-            "Illustration_96x96@1": b"%PDF-1.5\n%\xe2\xe3\xcf\xd3",
-            "Chars": b"\xc5\xa1\xc9\x94\xc9\x9b",
-            "Chars-32": b"\xff\xfe\x00\x00a\x01\x00\x00T\x02\x00\x00[\x02\x00\x00",
-            "Video": b"\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00",
-            "Toupie": NotPrintable(),
-        }
+            "Illustration_96x96@1": Metadata(
+                "Illustration_96x96@1", b"%PDF-1.5\n%\xe2\xe3\xcf\xd3"
+            ),
+            "Chars": Metadata("Chars", b"\xc5\xa1\xc9\x94\xc9\x9b"),
+            "Chars-32": Metadata(
+                "Chars-32", b"\xff\xfe\x00\x00a\x01\x00\x00T\x02\x00\x00[\x02\x00\x00"
+            ),
+            "Video": Metadata(
+                "Video", b"\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00"
+            ),
+            "Toupie": CustomTextMetadata("Toupie", NotPrintable("value")),
+        }  # intentionaly bad, to handle case where user does bad things
     )
+    # intentionaly bad, to handle case where user does bad things
+    creator._metadata["Relation"] = None  # pyright: ignore[reportArgumentType]
+    creator._metadata["BadRawValue"] = "Value"  # pyright: ignore[reportArgumentType]
+
     creator._log_metadata()
     # /!\ this must be alpha sorted
     mocked_logger.debug.assert_has_calls(
         [
+            call("Metadata: BadRawValue is improper metadata type: str: Value"),
             call("Metadata: Chars = šɔɛ"),
             call(
                 "Metadata: Chars-32 is a 16 bytes text/plain blob "
@@ -624,10 +648,7 @@ def __str__(self):
             ),
             call("Metadata: Flavour = nopic"),
             call("Metadata: Illustration_48x48@1 is a 3274 bytes 48x48px PNG Image"),
-            call(
-                "Metadata: Illustration_96x96@1 is a 14 bytes "
-                "application/pdf blob not recognized as an Image"
-            ),
+            call("Metadata: Illustration_96x96@1 is a 14 bytes application/pdf blob"),
             call("Metadata: Language = eng"),
             call("Metadata: License = CC-BY"),
             call(
@@ -637,21 +658,27 @@ def __str__(self):
             ),
             call("Metadata: Name = wikipedia_fr_football"),
             call("Metadata: Publisher = Wikipedia user Foobar"),
-            call("Metadata: Relation = None"),
+            call("Metadata: Relation is None"),
             call("Metadata: Scraper = mwoffliner 1.2.3"),
             call("Metadata: Source = https://en.wikipedia.org/"),
             call(f"Metadata: Tags = {tags}"),
             call("Metadata: TestMetadata = Test Metadata"),
             call("Metadata: Title = English Wikipedia"),
-            call("Metadata: Toupie is unexpected data type: NotPrintable"),
+            call(
+                "Metadata: Toupie is unexpected data type: "
+                "test_start_logs_metadata_log_contents.<locals>.NotPrintable"
+            ),
             call("Metadata: Video is a 33 bytes video/mp4 blob"),
         ]
     )
 
 
-def test_relax_metadata(tmp_path):
-    Creator(tmp_path, "", disable_metadata_checks=True).config_dev_metadata(
-        Description="T" * 90
+def test_relax_metadata(
+    tmp_path,
+    ignore_metadata_conventions,  # noqa: ARG001
+):
+    Creator(tmp_path, "", check_metadata_conventions=False).config_dev_metadata(
+        DescriptionMetadata("T" * 90)
     ).start()
 
 
@@ -679,22 +706,30 @@ def test_config_metadata(tmp_path, png_image, tags):
     with open(png_image, "rb") as fh:
         png_data = fh.read()
     creator = Creator(fpath, "").config_metadata(
-        Name="wikipedia_fr_football",
-        Title="English Wikipedia",
-        Creator="English speaking Wikipedia contributors",
-        Publisher="Wikipedia user Foobar",
-        Date="2009-11-21",
-        Description="All articles (without images) from the english Wikipedia",
-        LongDescription="This ZIM file contains all articles (without images)"
-        " from the english Wikipedia by 2009-11-10. The topics are...",
-        Language="eng",
-        License="CC-BY",
-        Tags=tags,
-        Flavour="nopic",
-        Source="https://en.wikipedia.org/",
-        Scraper="mwoffliner 1.2.3",
-        Illustration_48x48_at_1=png_data,
-        TestMetadata="Test Metadata",
+        StandardMetadataList(
+            Name=NameMetadata("wikipedia_fr_football"),
+            Title=TitleMetadata("English Wikipedia"),
+            Creator=CreatorMetadata("English speaking Wikipedia contributors"),
+            Publisher=PublisherMetadata("Wikipedia user Foobar"),
+            Date=DateMetadata("2009-11-21"),
+            Description=DescriptionMetadata(
+                "All articles (without images) from the english Wikipedia"
+            ),
+            LongDescription=LongDescriptionMetadata(
+                "This ZIM file contains all articles (without images)"
+                " from the english Wikipedia by 2009-11-10. The topics are..."
+            ),
+            Language=LanguageMetadata("eng"),
+            License=LicenseMetadata("CC-BY"),
+            Tags=TagsMetadata(tags),
+            Flavour=FlavourMetadata("nopic"),
+            Source=SourceMetadata("https://en.wikipedia.org/"),
+            Scraper=ScraperMetadata("mwoffliner 1.2.3"),
+            Illustration_48x48_at_1=IllustrationMetadata(
+                "Illustration_48x48@1", png_data
+            ),
+        ),
+        [CustomTextMetadata("X-TestMetadata", "Test Metadata")],
     )
     with creator:
         pass
@@ -720,48 +755,46 @@ def test_config_metadata(tmp_path, png_image, tags):
     )
     assert reader.get_text_metadata("Language") == "eng"
     assert reader.get_text_metadata("License") == "CC-BY"
-    assert (
-        reader.get_text_metadata("Tags")
-        == "wikipedia;_category:wikipedia;_pictures:no;_videos:no;"
-        "_details:yes;_ftindex:yes"
-    )
+    assert set(reader.get_text_metadata("Tags").split(";")) == set(
+        "wikipedia;_category:wikipedia;_pictures:no;_videos:no;"
+        "_details:yes;_ftindex:yes".split(";")
+    )  # order of tags is not guaranteed and does not matter
+    assert len(reader.get_text_metadata("Tags").split(";")) == 6
     assert reader.get_text_metadata("Flavour") == "nopic"
     assert reader.get_text_metadata("Source") == "https://en.wikipedia.org/"
     assert reader.get_text_metadata("Scraper") == "mwoffliner 1.2.3"
     assert reader.get_metadata("Illustration_48x48@1") == png_data
-    assert reader.get_text_metadata("TestMetadata") == "Test Metadata"
+    assert reader.get_text_metadata("X-TestMetadata") == "Test Metadata"
 
 
 def test_config_metadata_control_characters(tmp_path):
     fpath = tmp_path / "test_config.zim"
     creator = Creator(fpath, "").config_dev_metadata(
-        Description="\t\n\r\n \tA description \awith  \bcontrol characters\v",
-        LongDescription="A description \rwith \a\ncontrol characters\tsss\t\n\r\n \t",
-        Creator="  A creator ",
-    )
-    assert creator._metadata["Description"] == "A description with  control characters"
-    assert (
-        creator._metadata["LongDescription"]
-        == "A description \rwith \ncontrol characters\tsss"
+        [
+            DescriptionMetadata(
+                "\t\n\r\n \tA description \awith  \bcontrol characters\v"
+            ),
+            LongDescriptionMetadata(
+                "A description \rwith \a\ncontrol characters\tsss\t\n\r\n \t"
+            ),
+            CreatorMetadata("  A creator "),
+        ]
     )
-    assert creator._metadata["Creator"] == "A creator"
     with creator:
         creator.add_metadata(
-            "Description_1",
-            "\t\n\r\n \tA description \awith  \bcontrol characters\v",
-        )
-        creator.add_metadata(
-            "LongDescription_1",
-            "A description \rwith \a\ncontrol characters\tsss\t\n\r\n \t",
-        )
-        creator.add_metadata(
-            "Creator_1",
-            "  A creator ",
+            CustomTextMetadata(
+                "Description_1",
+                "\t\n\r\n \tA description \awith  \bcontrol characters\v",
+            )
         )
         creator.add_metadata(
-            "Binary1",
-            bytes.fromhex("01FA"),
+            CustomTextMetadata(
+                "LongDescription_1",
+                "A description \rwith \a\ncontrol characters\tsss\t\n\r\n \t",
+            )
         )
+        creator.add_metadata(CustomTextMetadata("Creator_1", "  A creator "))
+        creator.add_metadata(CustomMetadata("Binary1", bytes.fromhex("01FA")))
         pass
 
     assert fpath.exists()
@@ -788,83 +821,94 @@ def test_config_metadata_control_characters(tmp_path):
     assert bytes.hex(reader.get_metadata("Binary1")) == "01fa"
 
 
-@pytest.mark.parametrize(
-    "name,value,valid",
-    [
-        ("Name", 4, False),
-        ("Title", 4, False),
-        ("Creator", 4, False),
-        ("Publisher", 4, False),
-        ("Description", 4, False),
-        ("LongDescription", 4, False),
-        ("License", 4, False),
-        ("Relation", 4, False),
-        ("Relation", 4, False),
-        ("Flavour", 4, False),
-        ("Source", 4, False),
-        ("Scraper", 4, False),
-        ("Title", "में" * 30, True),
-        ("Title", "X" * 31, False),
-        ("Date", 4, False),
-        ("Date", datetime.datetime.now(), True),  # noqa: DTZ005
-        ("Date", datetime.datetime(1969, 12, 31, 23, 59), True),  # noqa: DTZ001
-        ("Date", datetime.date(1969, 12, 31), True),
-        ("Date", datetime.date.today(), True),  # noqa: DTZ011
-        ("Date", "1969-12-31", True),
-        ("Date", "1969-13-31", False),
-        ("Date", "2023/02/29", False),
-        ("Date", "2023-55-99", False),
-        ("Language", "xxx", False),
-        ("Language", "rmr", False),
-        ("Language", "eng", True),
-        ("Language", "fra", True),
-        ("Language", "bam", True),
-        ("Language", "fr", False),
-        ("Language", "en", False),
-        ("Language", "fra,eng", True),
-        ("Language", "fra,eng,bam", True),
-        ("Language", "fra,en,bam", False),
-        ("Language", "eng,", False),
-        ("Language", "eng, fra", False),
-        ("Counter", "1", False),
-        ("Description", "में" * 80, True),
-        ("Description", "X" * 81, False),
-        ("LongDescription", "में" * 4000, True),
-        ("LongDescription", "X" * 4001, False),
-        ("Tags", 4, False),
-        ("Tags", ["wikipedia", 4, "football"], False),
-        ("Tags", ("wikipedia", "football"), True),
-        ("Tags", ["wikipedia", "football"], True),
-        ("Tags", "wikipedia;football", True),
-        # 1x1 PNG image
-        (
-            "Illustration_48x48@1",
-            base64.b64decode(
-                "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAGXRFWHRTb2Z0d2FyZQBB"
-                "ZG9iZSBJbWFnZVJlYWR5ccllPAAAAA9JREFUeNpi+P//P0CAAQAF/gL+Lc6J7gAAAABJ"
-                "RU5ErkJggg=="
-            ),
+class ExtraMetadataCase(NamedTuple):
+    extras: list[Metadata]
+    fail_on_missing_prefix: bool
+    id: str
+
+
+def __get_extra_metadata_case_id(case: ExtraMetadataCase) -> str:
+    return case.id
+
+
+@pytest.fixture(
+    params=[
+        ExtraMetadataCase(
+            [CustomTextMetadata("X-TestMetadata", "Test Metadata")],
+            True,
+            id="good_prefix",
+        ),
+        ExtraMetadataCase(
+            [CustomTextMetadata("TestMetadata", "Test Metadata")],
             False,
+            id="bad_prefix",
         ),
-        (
-            "Illustration_48x48@1",
-            DEFAULT_DEV_ZIM_METADATA["Illustration_48x48_at_1"],
+        ExtraMetadataCase(
+            [
+                CustomTextMetadata("X-TestMetadata", "Test Metadata"),
+                CustomTextMetadata("X-TestMetadata2", "Test Metadata"),
+            ],
             True,
+            id="list_of_two_good_prefix",
         ),
-        (
-            "Illustration_96x96@1",
-            DEFAULT_DEV_ZIM_METADATA["Illustration_48x48_at_1"],
+        ExtraMetadataCase(
+            [
+                CustomTextMetadata("X-TestMetadata", "Test Metadata"),
+                CustomTextMetadata("TestMetadata2", "Test Metadata"),
+            ],
             False,
+            id="list_with_one_bad_prefix",
         ),
-    ]
-    + [(name, "", False) for name in MANDATORY_ZIM_METADATA_KEYS],
+    ],
+    ids=__get_extra_metadata_case_id,
 )
-def test_validate_metadata(tmp_path, name, value, valid):
-    if valid:
-        Creator(tmp_path / "_.zim", "").validate_metadata(name, value)
-    else:
-        with pytest.raises(ValueError):
-            Creator(tmp_path / "_.zim", "").validate_metadata(name, value)
+def metadata_extras(request: pytest.FixtureRequest):
+    yield request.param
+
+
+def test_metadata_extras(tmp_path, metadata_extras: ExtraMetadataCase):
+    Creator(tmp_path / "_.zim", "").config_metadata(
+        DEFAULT_DEV_ZIM_METADATA,
+        metadata_extras.extras,
+        fail_on_missing_prefix_in_extras=metadata_extras.fail_on_missing_prefix,
+    )
+
+
+def test_metadata_extras_dev(tmp_path, metadata_extras: ExtraMetadataCase):
+    Creator(tmp_path / "_.zim", "").config_dev_metadata(metadata_extras.extras)
+
+
+def test_metadata_extras_missing_prefix(tmp_path):
+    with pytest.raises(ValueError, match="does not starts with X- as expected"):
+        Creator(tmp_path / "_.zim", "").config_metadata(
+            DEFAULT_DEV_ZIM_METADATA,
+            [CustomTextMetadata("TestMetadata", "Test Metadata")],
+        )
+
+
+@pytest.mark.parametrize(
+    "name,metadata,expected_value",
+    [
+        pytest.param(
+            "X-Test",
+            CustomTextMetadata(
+                "X-Test", DEFAULT_DEV_ZIM_METADATA.Title.libzim_value.decode() + "Foo"
+            ),
+            DEFAULT_DEV_ZIM_METADATA.Title.libzim_value.decode() + "Foo",
+            id="simple_str",
+        ),
+        pytest.param("Tags", TagsMetadata(["tag1", "tag2"]), "tag1;tag2", id="tags"),
+    ],
+)
+def test_add_metadata(
+    tmp_path: pathlib.Path, name: str, metadata: Metadata, expected_value: str
+):
+    fpath = tmp_path / "test_blank.zim"
+    with Creator(fpath, "").config_dev_metadata() as creator:
+        creator.add_metadata(metadata)
+    assert fpath.exists()
+    reader = Archive(fpath)
+    assert reader.get_text_metadata(name) == expected_value
 
 
 def test_config_indexing(tmp_path):

From 968a62c6d97c623113ac406d511260e581525957 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Tue, 10 Dec 2024 16:34:24 +0000
Subject: [PATCH 02/13] Added beartype to `zim` submodule

---
 pyproject.toml                    | 3 +++
 src/zimscraperlib/zim/__init__.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 26ace799..a5776507 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
   "regex>=2020.7.14",
   "pymupdf>=1.24.0,<2.0",
   "CairoSVG>=2.2.0,<3.0",
+  "beartype==0.19.0",
   # youtube-dl should be updated as frequently as possible
   "yt-dlp"
 ]
@@ -252,6 +253,8 @@ ban-relative-imports = "all"
 "tests/**/*" = ["PLR2004", "S101", "TID252"]
 # _libkiwix mimics libkiwix C++ code, names obey C++ conventions
 "src/zimscraperlib/zim/_libkiwix.py" = ["N802", "N803", "N806"]
+# beartype must be first
+"src/zimscraperlib/zim/__init__.py" = ["E402"]
 
 [tool.pytest.ini_options]
 minversion = "7.3"
diff --git a/src/zimscraperlib/zim/__init__.py b/src/zimscraperlib/zim/__init__.py
index df282f3a..6b0b1714 100644
--- a/src/zimscraperlib/zim/__init__.py
+++ b/src/zimscraperlib/zim/__init__.py
@@ -9,8 +9,11 @@
     zim.items: item to add to creator
     zim.archive: read ZIM files, accessing or searching its content"""
 
+from beartype.claw import beartype_this_package
 from libzim.writer import Blob  # pyright: ignore
 
+beartype_this_package()
+
 from zimscraperlib.zim.archive import Archive
 from zimscraperlib.zim.creator import Creator
 from zimscraperlib.zim.filesystem import make_zim_file

From f633e7a4cab6f8dc788b5caea9f12142b281ac01 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Thu, 12 Dec 2024 09:56:08 +0000
Subject: [PATCH 03/13] Fixing CounterMap type

---
 src/zimscraperlib/zim/_libkiwix.py | 13 ++++++++-----
 src/zimscraperlib/zim/archive.py   |  4 ++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/zimscraperlib/zim/_libkiwix.py b/src/zimscraperlib/zim/_libkiwix.py
index 02cae889..5e24e159 100644
--- a/src/zimscraperlib/zim/_libkiwix.py
+++ b/src/zimscraperlib/zim/_libkiwix.py
@@ -15,12 +15,15 @@
 from __future__ import annotations
 
 import io
-from collections import namedtuple
+from typing import NamedTuple
 
-MimetypeAndCounter = namedtuple("MimetypeAndCounter", ["mimetype", "value"])
-CounterMap = dict[
-    type(MimetypeAndCounter.mimetype), type(MimetypeAndCounter.value)  # pyright: ignore
-]
+
+class MimetypeAndCounter(NamedTuple):
+    mimetype: str
+    value: int
+
+
+type CounterMap = dict[str, int]
 
 
 def getline(src: io.StringIO, delim: str | None = None) -> tuple[bool, str]:
diff --git a/src/zimscraperlib/zim/archive.py b/src/zimscraperlib/zim/archive.py
index 19314687..891fb976 100644
--- a/src/zimscraperlib/zim/archive.py
+++ b/src/zimscraperlib/zim/archive.py
@@ -17,7 +17,7 @@
 import libzim.search  # Query, Searcher  # pyright: ignore
 import libzim.suggestion  # SuggestionSearcher  # pyright: ignore
 
-from zimscraperlib.zim._libkiwix import convertTags, parseMimetypeCounter
+from zimscraperlib.zim._libkiwix import CounterMap, convertTags, parseMimetypeCounter
 
 
 class Archive(libzim.reader.Archive):
@@ -101,7 +101,7 @@ def get_search_results_count(self, query: str) -> int:
         return search.getEstimatedMatches()
 
     @property
-    def counters(self) -> dict[str, int]:
+    def counters(self) -> CounterMap:
         try:
             return parseMimetypeCounter(self.get_text_metadata("Counter"))
         except RuntimeError:  # pragma: no cover (no ZIM avail to test itl)

From 1a5db4b83e007d32a68b7cb9325ef28d7381f527 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Thu, 12 Dec 2024 09:56:52 +0000
Subject: [PATCH 04/13] Fixed call to make_zim_file in test (type)

---
 tests/zim/test_fs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/zim/test_fs.py b/tests/zim/test_fs.py
index 3eec120c..2ba5cfc1 100644
--- a/tests/zim/test_fs.py
+++ b/tests/zim/test_fs.py
@@ -142,7 +142,7 @@ def test_make_zim_file_no_file_on_error(tmp_path, png_image, build_data):
         illustration="{png_image.name}",
         title="Test ZIM",
         description="A test ZIM",
-        redirects_file="{build_data["redirects_file"]}")
+        redirects_file=pathlib.Path("{build_data["redirects_file"]}"))
 except Exception as exc:
     print(exc)
 finally:

From a5d221297d0f80b48ab1800417e661ece58f7076 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Thu, 12 Dec 2024 09:58:53 +0000
Subject: [PATCH 05/13] Introducing Callback via new typing module

- Explicit callback definition
- simplified delete_callback to be a dumb callback (not chaining)
---
 src/zimscraperlib/filesystem.py     | 14 ++----------
 src/zimscraperlib/typing.py         | 26 ++++++++++++++++++++++
 src/zimscraperlib/zim/creator.py    | 34 +++++++++++++++++------------
 tests/filesystem/test_filesystem.py | 20 +----------------
 4 files changed, 49 insertions(+), 45 deletions(-)
 create mode 100644 src/zimscraperlib/typing.py

diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py
index 65e0a2c7..6ffbe6a1 100644
--- a/src/zimscraperlib/filesystem.py
+++ b/src/zimscraperlib/filesystem.py
@@ -9,8 +9,6 @@
 
 import os
 import pathlib
-from collections.abc import Callable
-from typing import Any
 
 import magic
 
@@ -44,15 +42,7 @@ def get_content_mimetype(content: bytes | str) -> str:
     return MIME_OVERRIDES.get(detected_mime, detected_mime)
 
 
-def delete_callback(
-    fpath: str | pathlib.Path,
-    callback: Callable | None = None,
-    *callback_args: Any,
-):
-    """helper deleting passed filepath, optionnaly calling an additional callback"""
+def delete_callback(fpath: str | pathlib.Path):
+    """helper deleting passed filepath"""
 
     os.unlink(fpath)
-
-    # call the callback if requested
-    if callback and callable(callback):
-        callback.__call__(*callback_args)
diff --git a/src/zimscraperlib/typing.py b/src/zimscraperlib/typing.py
new file mode 100644
index 00000000..237f1ae7
--- /dev/null
+++ b/src/zimscraperlib/typing.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Any, NamedTuple
+
+
+class Callback(NamedTuple):
+    func: Callable
+    args: tuple[Any, ...] | None = None
+    kwargs: dict[str, Any] | None = None
+
+    @property
+    def callable(self) -> bool:
+        return callable(self.func)
+
+    def get_args(self) -> tuple[Any, ...]:
+        return self.args or ()
+
+    def get_kwargs(self) -> dict[str, Any]:
+        return self.kwargs or {}
+
+    def call_with(self, *args, **kwargs):
+        self.func.__call__(*args, **kwargs)
+
+    def call(self):
+        self.call_with(*self.get_args(), **self.get_kwargs())
diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
index e0046c87..781259b3 100644
--- a/src/zimscraperlib/zim/creator.py
+++ b/src/zimscraperlib/zim/creator.py
@@ -40,6 +40,7 @@
 )
 from zimscraperlib.i18n import is_valid_iso_639_3
 from zimscraperlib.types import get_mime_for_name
+from zimscraperlib.typing import Callback
 from zimscraperlib.zim.indexing import IndexData
 from zimscraperlib.zim.items import StaticItem
 from zimscraperlib.zim.metadata import (
@@ -341,7 +342,7 @@ def add_item_for(
         should_compress: bool | None = None,
         delete_fpath: bool | None = False,
         duplicate_ok: bool | None = None,
-        callback: Callable | tuple[Callable, Any] | None = None,
+        callbacks: list[Callback] | Callback | None = None,
         index_data: IndexData | None = None,
         auto_index: bool = True,
     ):
@@ -365,6 +366,11 @@ def add_item_for(
         if fpath is None and content is None:
             raise ValueError("One of fpath or content is required")
 
+        if isinstance(callbacks, Callback):
+            callbacks = [callbacks]
+        elif callbacks is None:
+            callbacks = []
+
         mimetype = mimetype_for(
             path=path, content=content, fpath=fpath, mimetype=mimetype
         )
@@ -377,12 +383,7 @@ def add_item_for(
             hints[libzim.writer.Hint.COMPRESS] = should_compress
 
         if delete_fpath and fpath:
-            cb = [delete_callback, fpath]
-            if callback and callable(callback):
-                cb.append(callback)
-            elif callback:
-                cb += list(callback)
-            callback = tuple(cb)
+            callbacks.append(Callback(func=delete_callback, args=(fpath,)))
 
         self.add_item(
             StaticItem(
@@ -395,7 +396,7 @@ def add_item_for(
                 index_data=index_data,
                 auto_index=auto_index,
             ),
-            callback=callback,
+            callbacks=callbacks,
             duplicate_ok=duplicate_ok,
         )
         return path
@@ -404,18 +405,23 @@ def add_item(
         self,
         item: libzim.writer.Item,
         duplicate_ok: bool | None = None,
-        callback: Callable | tuple[Callable, Any] | None = None,
+        callbacks: list[Callback] | Callback | None = None,
     ):
         """Add a libzim.writer.Item
 
         callback: either a single callable or a tuple containing the callable
         as first element then the arguments to pass to the callable.
         Note: you must __not__ include the item itself in those arguments."""
-        if callback:
-            if callable(callback):
-                weakref.finalize(item, callback)
-            else:
-                weakref.finalize(item, *callback)
+        if isinstance(callbacks, Callback):
+            callbacks = [callbacks]
+        elif callbacks is None:
+            callbacks = []
+
+        for callback in callbacks:
+            if callback.callable:
+                weakref.finalize(
+                    item, callback.func, *callback.get_args(), **callback.get_kwargs()
+                )
 
         duplicate_ok = duplicate_ok or self.ignore_duplicates
         try:
diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py
index 08579c16..daf2bec8 100644
--- a/tests/filesystem/test_filesystem.py
+++ b/tests/filesystem/test_filesystem.py
@@ -43,25 +43,7 @@ def test_mime_overrides(svg_image):
             assert get_content_mimetype(fh.read(64)) == expected_mime
 
 
-def test_delete_callback_with_cb(tmp_path):
-    class Store:
-        called = 0
-
-    def cb(*args):  # noqa: ARG001
-        Store.called += 1
-
-    fpath = tmp_path.joinpath("my-file")
-    with open(fpath, "w") as fh:
-        fh.write("content")
-
-    delete_callback(fpath, cb, fpath.name)
-
-    assert not fpath.exists()
-    assert Store.called
-    assert Store.called == 1
-
-
-def test_delete_callback_without_cb(tmp_path):
+def test_delete_callback(tmp_path):
     fpath = tmp_path.joinpath("my-file")
     with open(fpath, "w") as fh:
         fh.write("content")

From d44fa1e891bf71c85624e6905303ca62b925fc56 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Thu, 12 Dec 2024 10:00:39 +0000
Subject: [PATCH 06/13] Revamped metadata module around single base Metadata

---
 src/zimscraperlib/zim/creator.py    |  23 +-
 src/zimscraperlib/zim/filesystem.py |   8 +-
 src/zimscraperlib/zim/metadata.py   | 719 +++++++++++++++-------------
 tests/zim/conftest.py               |   4 +-
 tests/zim/test_metadata.py          | 362 +++++++-------
 tests/zim/test_zim_creator.py       | 106 ++--
 6 files changed, 647 insertions(+), 575 deletions(-)

diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
index 781259b3..318abeb9 100644
--- a/src/zimscraperlib/zim/creator.py
+++ b/src/zimscraperlib/zim/creator.py
@@ -24,13 +24,10 @@
 import pathlib
 import re
 import weakref
-from collections.abc import Callable
-from typing import Any
 
 import libzim.writer  # pyright: ignore
 import PIL.Image
 
-import zimscraperlib.zim.metadata
 from zimscraperlib import logger
 from zimscraperlib.constants import FRONT_ARTICLE_MIMETYPES
 from zimscraperlib.filesystem import (
@@ -102,8 +99,8 @@ class Creator(libzim.writer.Creator):
     Use workaround_nocancel=False to disable the workaround.
 
     By default, all metadata are validated for compliance with openZIM specification and
-    conventions. Set check_metadata_conventions=True to disable this validation (you can
-    still do checks manually with the validation methods or your own logic).
+    conventions. Set metdata.APPLY_RECOMMENDATIONS to False to disable this validation
+    # (you canstill do checks manually with the validation methods or your own logic).
     """
 
     def __init__(
@@ -114,7 +111,6 @@ def __init__(
         *,
         workaround_nocancel: bool | None = True,
         ignore_duplicates: bool | None = False,
-        check_metadata_conventions: bool = True,
     ):
         super().__init__(filename=filename)
         self._metadata: dict[str, Metadata] = {}
@@ -132,9 +128,6 @@ def __init__(
 
         self.workaround_nocancel = workaround_nocancel
         self.ignore_duplicates = ignore_duplicates
-        zimscraperlib.zim.metadata.check_metadata_conventions = (
-            check_metadata_conventions
-        )
 
     def config_indexing(
         self, indexing: bool, language: str | None = None  # noqa: FBT001
@@ -211,7 +204,7 @@ def _get_first_language_metadata_value(self) -> str | None:
         """Private methods to get most popular lang code from Language metadata"""
         for metadata in self._metadata.values():
             if isinstance(metadata, LanguageMetadata):
-                return metadata.libzim_value.decode().split(",")[0]
+                return metadata.value[0]
         return None
 
     def start(self):
@@ -244,18 +237,14 @@ def start(self):
 
         for metadata in self._metadata.values():
             if isinstance(metadata, IllustrationMetadata):
-                self.add_illustration(metadata.size, metadata.libzim_value)
+                self.add_illustration(metadata.illustration_size, metadata.libzim_value)
             else:
                 self.add_metadata(metadata)
         self._metadata.clear()
 
         return self
 
-    def add_metadata(
-        self,
-        value: Metadata,
-        mimetype: str = "text/plain;charset=UTF-8",
-    ):
+    def add_metadata(self, value: Metadata):
         """Really add the metadata to the ZIM, after ZIM creation has started.
 
         You would probably prefer to use config_metadata methods to check metadata
@@ -263,7 +252,7 @@ def add_metadata(
         duplicate metadata name.
         """
 
-        super().add_metadata(value.name, value.libzim_value, mimetype)
+        super().add_metadata(value.name, value.libzim_value, value.mimetype)
 
     def config_metadata(
         self,
diff --git a/src/zimscraperlib/zim/filesystem.py b/src/zimscraperlib/zim/filesystem.py
index 6cbc73e3..baa39c90 100644
--- a/src/zimscraperlib/zim/filesystem.py
+++ b/src/zimscraperlib/zim/filesystem.py
@@ -166,11 +166,13 @@ def make_zim_file(
     with open(illustration_path, "rb") as fh:
         illustration_data = fh.read()
 
+    # disable recommendations if requested
+    metadata.APPLY_RECOMMENDATIONS = not disable_metadata_checks
+
     zim_file = Creator(
         filename=fpath,
         main_path=main_page,
         ignore_duplicates=ignore_duplicates,
-        check_metadata_conventions=disable_metadata_checks,
     ).config_metadata(
         metadata.StandardMetadataList(
             # mandatory
@@ -181,8 +183,8 @@ def make_zim_file(
             Language=metadata.LanguageMetadata(language),
             Creator=metadata.CreatorMetadata(creator),
             Publisher=metadata.PublisherMetadata(publisher),
-            Illustration_48x48_at_1=metadata.IllustrationMetadata(
-                "Illustration_48x48@1", illustration_data
+            Illustration_48x48_at_1=metadata.DefaultIllustrationMetadata(
+                illustration_data
             ),
             # optional
             Tags=metadata.TagsMetadata(list(tags)) if tags else None,
diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py
index 0fe4f508..6fc5c439 100644
--- a/src/zimscraperlib/zim/metadata.py
+++ b/src/zimscraperlib/zim/metadata.py
@@ -3,8 +3,10 @@
 import base64
 import datetime
 import io
-import re
-from dataclasses import dataclass
+from collections.abc import Iterable
+from dataclasses import asdict, dataclass, fields
+from itertools import filterfalse
+from typing import Any, BinaryIO
 
 import regex
 
@@ -21,9 +23,8 @@
 # All control characters are disallowed in str metadata except \n, \r and \t
 UNWANTED_CONTROL_CHARACTERS_REGEX = regex.compile(r"(?![\n\t\r])\p{C}")
 
-# flag to enable / disable check conventions (e.g. title shorter than 30 chars,
-# description shorter than 80 chars, ...)
-check_metadata_conventions: bool = True
+# whether to apply openZIM recommendations (see https://wiki.openzim.org/wiki/Metadata)
+APPLY_RECOMMENDATIONS: bool = True
 
 
 def clean_str(value: str) -> str:
@@ -36,391 +37,419 @@ def nb_grapheme_for(value: str) -> int:
     return len(regex.findall(r"\X", value))
 
 
-class Metadata:
-    """A very basic metadata, with a name and a bytes or io.BytesIO value
+def mandatory(cls):
+    """Marks a Metadata mandatory: must be set to please Creator and cannot be empty"""
+    cls.is_required = True
+    cls.empty_allowed = False
+    return cls
 
-    Compliance with ZIM specification is done at initialisation, value passed at
-    initialization is kept in memory in `value` attribute, target name is stored in
-    `name` value and conversion to libzim value is available with `libzim_value`
-    property.
-    """
 
-    def __init__(self, name: str, value: bytes | io.BytesIO) -> None:
-        if not isinstance(value, bytes | io.BytesIO):
-            raise ValueError(
-                f"Unexpected type passed to {self.__class__.__qualname__}: "
-                f"{value.__class__.__qualname__}"
-            )
-        if name == "Counter":
-            raise ValueError("Counter cannot be set. libzim sets it.")
-        self.name = name
-        self.value = value
-        libzim_value = self.libzim_value  # to check for errors
-        if libzim_value is None or len(libzim_value) == 0:
-            raise ValueError("Cannot set empty metadata")
+def expecting_text(cls):
+    """Expects a Text (str) input. Will be cleaned-up and UTF-8 encoded"""
+    cls.expecting = "text"
+    cls.require_text_cleanup = True
+    cls.require_utf8_encoding = True
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        if isinstance(self.value, io.BytesIO):
-            return self.value.getvalue()
-        return self.value
+    # native type is str
+    def get_cleaned_value(self, value: str) -> str:
+        if self.require_text_cleanup:
+            value = clean_str(value)
 
+        if not self.empty_allowed and not value.strip():
+            raise ValueError("Missing value (empty not allowed)")
 
-class _TextMetadata(Metadata):
-    """A basic metadata whose value is expected to be some text"""
+        # max-length is openZIM recommendation
+        if getattr(self, "oz_max_length", 0) and APPLY_RECOMMENDATIONS:
+            if nb_grapheme_for(value) > self.oz_max_length:
+                raise ValueError(
+                    f"{self.name} value is too long ({self.oz_max_length})"
+                )
+        return value
 
-    def __init__(self, name: str, value: bytes | io.BytesIO | str) -> None:
-        if not isinstance(value, bytes | io.BytesIO | str):
-            raise ValueError(
-                f"Unexpected type passed to {self.__class__.__qualname__}: "
-                f"{value.__class__.__qualname__}"
-            )
-        super().__init__(
-            name=name,
-            value=(
-                value.encode()
-                if isinstance(value, str)
-                else value.getvalue() if isinstance(value, io.BytesIO) else value
-            ),
-        )
-        self.value = value
-        _ = self.libzim_value  # to check for errors
+    def get_libzim_value(self) -> bytes:
+        return self.get_encoded(self.value)
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        # decode and reencode byte types to validate it's proper UTF-8 text
-        if isinstance(self.value, io.BytesIO):
-            str_value = self.value.getvalue().decode()
-        elif isinstance(self.value, bytes):
-            str_value = self.value.decode()
-        else:
-            str_value = self.value
-        return clean_str(str_value).encode()
-
-
-def _check_for_allowed_custom_metadata_name(name: str, class_name: str):
-    """Check that metadata name is not among the standard ones for which a type exist"""
-    if name in DEFAULT_DEV_ZIM_METADATA.__dict__.keys():
-        # this list contains the 'bad' illustration keys, but we don't care, they should
-        # still not be used
-        raise ValueError(
-            f"It is not allowed to use {class_name} for standard {name} "
-            f"metadata. Please use {name}Metadata type for proper checks"
-        )
-    if name.startswith("Illustration_"):
-        raise ValueError(
-            f"It is not allowed to use {class_name} for standard Illustration"
-            "metadata. Please use IllustrationMetadata type for proper checks"
-        )
-
-
-class CustomTextMetadata(_TextMetadata):
-    """A text metadata for which we do little checks"""
-
-    def __init__(self, name: str, value: bytes | io.BytesIO | str) -> None:
-        _check_for_allowed_custom_metadata_name(name, self.__class__.__qualname__)
-        super().__init__(name=name, value=value)
+    cls.get_cleaned_value = get_cleaned_value
+    cls.get_libzim_value = get_libzim_value
+    return cls
 
 
-class CustomMetadata(Metadata):
-    """A bytes metadata for which we do little checks"""
+def expecting_textlist(cls):
+    """Expects a Text List (list[str]) input. Each item will be cleaned-up.
 
-    def __init__(self, name: str, value: bytes | io.BytesIO) -> None:
-        _check_for_allowed_custom_metadata_name(name, self.__class__.__qualname__)
-        super().__init__(name=name, value=value)
+    List will be joined (see `join_list_with`) and UTF-8 encoded"""
+    cls.expecting = "textlist"
+    cls.require_textlist_cleanup = True
+    cls.require_utf8_encoding = True
 
+    # native type is list[str]
+    def get_cleaned_value(self, value: Iterable[str] | str) -> list[str]:
+        if isinstance(value, str):
+            value = [value]
+        else:
+            value = list(value)
+        if self.require_textlist_cleanup:
+            value = [clean_str(item) for item in value]
+        if not self.empty_allowed and (not value or not all(value)):
+            raise ValueError("Missing value (empty not allowed)")
+        if not self.duplicates_allowed and len(set(value)) != len(value):
+            raise ValueError("Duplicate entries not allowed")
+        elif self.require_deduplication:
+            value = unique_values(value)
+        if self.oz_only_iso636_3_allowed and APPLY_RECOMMENDATIONS:
+            invalid_codes = list(filterfalse(is_valid_iso_639_3, value))
+            if invalid_codes:
+                raise ValueError(
+                    f"Following code(s) are not ISO-639-3: {','.join(invalid_codes)}"
+                )
+        return value
 
-class _MandatoryTextMetadata(_TextMetadata):
-    """A mandatory (value must be set) text metadata"""
+    def get_libzim_value(self) -> bytes:
+        return self.get_encoded(self.join_list_with.join(self.value))
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        value = super().libzim_value
-        if len(value) == 0:
-            raise ValueError("Missing value for mandatory metadata")
-        return value
+    cls.get_cleaned_value = get_cleaned_value
+    cls.get_libzim_value = get_libzim_value
+    return cls
 
 
-class _MandatoryMetadata(Metadata):
-    """A mandatory (value must be set) bytes metadata"""
+def expecting_date(cls):
+    """Expects a Date (date | datetime) input. Will be UTF-8 encoded as YYYY-MM-DD"""
+    cls.expecting = "date"
+    cls.require_utf8_encoding = True
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        value = super().libzim_value
-        if len(value) == 0:
-            raise ValueError("Missing value for mandatory metadata")
+    # native type is date
+    def get_cleaned_value(
+        self, value: datetime.date | datetime.datetime  # noqa: ARG001
+    ) -> datetime.date:
+        if isinstance(value, datetime.datetime):
+            value = value.date()
         return value
 
+    def get_libzim_value(self) -> bytes:
+        return self.get_encoded(self.value.strftime("%Y-%m-%d"))
 
-class TitleMetadata(_MandatoryTextMetadata):
-    """The Title metadata"""
+    cls.get_cleaned_value = get_cleaned_value
+    cls.get_libzim_value = get_libzim_value
+    return cls
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__(name="Title", value=value)
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        value = super().libzim_value
-        if check_metadata_conventions:
-            if nb_grapheme_for(value.decode()) > RECOMMENDED_MAX_TITLE_LENGTH:
-                raise ValueError("Title is too long.")
+def expecting_illustration(cls):
+    """Expects a Square PNG Illustration (bytes-like) input.
+
+    PNG format and squareness will be checked"""
+    cls.expecting = "illustration"
+    cls.meta_mimetype = "image/png"
+
+    # native type is PNG image buffer
+    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+        if isinstance(value, BinaryIO):
+            payload = value.read()
+            value.seek(0)
+            value = payload
+        elif isinstance(value, io.BytesIO):
+            value = value.getvalue()
+        if not self.empty_allowed and not value:
+            raise ValueError("Missing value (empty not allowed)")
+
+        if not is_valid_image(
+            image=value,
+            imformat="PNG",
+            size=(self.illustration_size, self.illustration_size),
+        ):
+            raise ValueError(
+                f"{self.name} is not a valid "
+                f"{self.illustration_size}x{self.illustration_size} PNG Image"
+            )
         return value
 
+    def get_libzim_value(self) -> bytes:
+        return self.value
 
-class DescriptionMetadata(_MandatoryTextMetadata):
-    """The Description metadata"""
+    cls.get_cleaned_value = get_cleaned_value
+    cls.get_libzim_value = get_libzim_value
+    return cls
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__(name="Description", value=value)
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        value = super().libzim_value
-        if check_metadata_conventions:
-            if nb_grapheme_for(value.decode()) > MAXIMUM_DESCRIPTION_METADATA_LENGTH:
-                raise ValueError("Description is too long.")
-        return value
+def allow_empty(cls):
+    """Whether input can be blank"""
+    cls.empty_allowed = True
+    return cls
 
 
-class LongDescriptionMetadata(_MandatoryTextMetadata):
-    """The LongDescription metadata"""
+def allow_duplicates(cls):
+    """Whether list input can accept duplicate values"""
+    cls.duplicates_allowed = True
+    return cls
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__(name="LongDescription", value=value)
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        value = super().libzim_value
-        if check_metadata_conventions:
-            if (
-                nb_grapheme_for(value.decode())
-                > MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
-            ):
-                raise ValueError("LongDescription is too long.")
-        return value
+def deduplicate(cls):
+    """Whether duplicates in list inputs should be reduced"""
+    cls.duplicates_allowed = True
+    cls.require_deduplication = True
+    return cls
 
 
-class DateMetadata(_TextMetadata):
-    """The Date metadata"""
+def only_lang_codes(cls):
+    """Whether list input should be checked to only accept ISO-639-1 codes"""
+    cls.oz_only_iso636_3_allowed = True
+    return cls
 
-    def __init__(self, value: bytes | str | datetime.date | datetime.datetime) -> None:
-        if not isinstance(value, bytes | str | datetime.date | datetime.datetime):
-            raise ValueError(
-                f"Unexpected type passed to {self.__class__.__qualname__}: "
-                f"{value.__class__.__qualname__}"
-            )
-        super().__init__(
-            name="Date",
-            value=(
-                value
-                if isinstance(value, bytes)
-                else (
-                    value.encode()
-                    if isinstance(value, str)
-                    else value.strftime("%Y-%m-%d").encode()
-                )
-            ),
-        )
-        self.value = value
-        _ = self.libzim_value  # to check for errors
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        if isinstance(self.value, datetime.date | datetime.datetime):
-            return self.value.strftime("%Y-%m-%d").encode()
-        match = re.match(
-            r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})",
-            self.value.decode() if isinstance(self.value, bytes) else self.value,
-        )
-        if not match:
-            raise ValueError("Invalid date format, not matching regex yyyy-mm-dd")
-        try:
-            datetime.date(**{k: int(v) for k, v in match.groupdict().items()})
-        except Exception as exc:
-            raise ValueError(f"Invalid date format: {exc}") from None
-        return self.value if isinstance(self.value, bytes) else self.value.encode()
-
-
-class IllustrationMetadata(_MandatoryMetadata):
-    """Any Illustration_**x**@* metadata"""
-
-    def __init__(self, name: str, value: bytes | io.BytesIO) -> None:
-        super().__init__(name=name, value=value)
-        _ = self.libzim_value  # to check for errors
+def x_protected(cls):
+    """Whether metadata name should be checked for collision with reserved names
 
-    @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        match = ILLUSTRATIONS_METADATA_RE.match(self.name)
-        if not match:
-            raise ValueError("Illustration metadata has improper name")
-        self.size = int(match.groupdict()["height"])
-        if int(match.groupdict()["width"]) != self.size:
-            raise ValueError("Illustration is not squared")
-        if not is_valid_image(
-            image=self.value,
-            imformat="PNG",
-            size=(self.size, self.size),
-        ):
-            raise ValueError(
-                f"{self.name} is not a valid {self.size}x{self.size} PNG Image"
-            )
-        if isinstance(self.value, io.BytesIO):
-            return self.value.getvalue()
-        else:
-            return self.value
+    when applying recommendations"""
+    cls.oz_x_protected = True
+    return cls
 
 
-class LanguageMetadata(_MandatoryTextMetadata):
-    """The Language metadata"""
+def x_prefixed(cls):
+    """Whether metadata names should be automatically X-Prefixed"""
+    cls.oz_x_protected = False
+    cls.oz_x_prefixed = True
+    return cls
 
-    def __init__(self, value: bytes | io.BytesIO | str | list[str] | set[str]) -> None:
-        if not isinstance(value, bytes | io.BytesIO | str | list | set):
-            raise ValueError(
-                f"Unexpected type passed to {self.__class__.__qualname__}: "
-                f"{value.__class__.__qualname__}"
-            )
-        if isinstance(value, list | set) and not all(
-            isinstance(item, str) for item in value
-        ):
-            bad_types = {item.__class__.__qualname__ for item in value} - {"str"}
-            raise ValueError(
-                f"Invalid type(s) found in Iterable: {",".join(bad_types)}"
-            )
-        super().__init__(
-            name="Language",
-            value=",".join(value) if isinstance(value, list | set) else value,
-        )
-        self.value = value
-        _ = self.libzim_value  # to check for errors
+
+class Metadata:
+    # name of the metadata (not its value)
+    meta_name: str
+    value: Any
+
+    # MIME type of the value
+    meta_mimetype: str
+
+    # whether metadata is required or not
+    is_required: bool = False
+
+    # what kind of input data is expected
+    expecting: str = "bytes"  # text | textlist | date | illustration
+
+    # str: text value must be cleaned-up
+    require_text_cleanup: bool = False
+
+    # whether an empty value is allowed or not
+    empty_allowed: bool = False
+
+    # str: whether text value must be under a certain length
+    oz_max_length: int
+
+    # list[str]: text values in list must be cleaned-up
+    require_textlist_cleanup: bool = False
+
+    # list[str]: whether duplicate values are allowed in a list of str
+    duplicates_allowed: bool = False
+    # list[str]: whether duplicate values are automatically removed
+    require_deduplication: bool = False
+
+    # list[str]: whether values in list must all be ISO-636-3 codes
+    oz_only_iso636_3_allowed: bool = False
+
+    join_list_with: str = " "  # , | ;
+
+    illustration_size: int
+    illustration_scale: int = 1
+
+    # str, list[str]: whether text value must be encoded to UTF-8
+    require_utf8_encoding: bool = False
+
+    # wether Name is automatically X-prefixed
+    oz_x_prefixed: bool = False
+    # wether Name is prevented to clash with reserved names
+    oz_x_protected: bool = False
+
+    def __init__(
+        self, value: Any, name: str | None = None, mimetype: str | None = None
+    ) -> None:
+        if name:
+            self.meta_name = name
+        if not getattr(self, "meta_name", ""):
+            raise OSError("Metadata name missing")
+        if mimetype:
+            self.meta_mimetype = mimetype
+        self.value = self.get_cleaned_value(value)
+        self.validate()
+
+    def get_mimetype(self) -> str:
+        # explicitly set first
+        return getattr(self, "meta_mimetype", "text/plain;charset=UTF-8")
 
     @property
-    def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        if isinstance(self.value, bytes | io.BytesIO | str):
-            codes = [
-                clean_str(code) for code in super().libzim_value.decode().split(",")
-            ]
-        else:
-            codes = [clean_str(code) for code in self.value]
-        codes = [code for code in codes if code]  # remove empty values
-        if len(codes) == 0:
-            raise ValueError("Missing value for mandatory Language metadata")
-        if len(set(codes)) != len(codes):
-            raise ValueError("Duplicate codes not allowed in Language metadata")
-        for code in codes:
-            if not is_valid_iso_639_3(code):
-                raise ValueError(f"{code} is not ISO-639-3.")
-        return ",".join(codes).encode()
-
-
-class TagsMetadata(_TextMetadata):
-    """The Tags metadata"""
-
-    def __init__(self, value: bytes | io.BytesIO | str | list[str] | set[str]) -> None:
-        if not isinstance(value, bytes | io.BytesIO | str | list | set):
-            raise ValueError(
-                f"Unexpected type passed to {self.__class__.__qualname__}: "
-                f"{value.__class__.__qualname__}"
+    def mimetype(self) -> str:
+        return self.get_mimetype()
+
+    @staticmethod
+    def matches_reserved_name(name: str) -> bool:
+        if name in [field.name for field in fields(StandardMetadataList)]:
+            return True
+        # set by libzim
+        if name == "Counter":
+            return True
+        return bool(ILLUSTRATIONS_METADATA_RE.match(name))
+
+    def get_name(self) -> str:
+        if self.expecting == "illustration":
+            return (
+                f"Illustration_{self.illustration_size}x{self.illustration_size}"
+                f"@{self.illustration_scale}"
             )
-        if isinstance(value, list | set) and not all(
-            isinstance(item, str) for item in value
+
+        # X- prefix is recommendation for custom
+        if (
+            self.oz_x_protected
+            and APPLY_RECOMMENDATIONS
+            and not self.meta_name.startswith("X-")
+            and self.matches_reserved_name(self.meta_name)
         ):
-            bad_types = {item.__class__.__qualname__ for item in value} - {"str"}
-            raise ValueError(
-                f"Invalid type(s) found in Iterable: {",".join(bad_types)}"
-            )
-        super().__init__(
-            name="Tags",
-            value=";".join(value) if isinstance(value, list | set) else value,
-        )
-        self.value = value
-        _ = self.libzim_value  # to check for errors
+            raise ValueError("Custom metdata name must be X- prefixed")
+        if (
+            self.oz_x_prefixed
+            and APPLY_RECOMMENDATIONS
+            and not self.meta_name.startswith("X-")
+        ):
+            return f"X-{self.meta_name}"
+        return self.meta_name
+
+    @property
+    def name(self) -> str:
+        return self.get_name()
+
+    @staticmethod
+    def get_encoded(value: str) -> bytes:
+        return value.encode()
 
     @property
     def libzim_value(self) -> bytes:
-        """The value to pass to the libzim for creating the metadata"""
-        if isinstance(self.value, bytes | io.BytesIO | str):
-            tags = unique_values(
-                [clean_str(tag) for tag in super().libzim_value.decode().split(";")]
-            )
-        else:
-            tags = unique_values([clean_str(tag) for tag in self.value])
-        tags = [tag for tag in tags if tag]  # remove empty values
-        return ";".join(tags).encode()
+        return self.get_libzim_value()
+
+    # native type is bytes
+    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+        if isinstance(value, BinaryIO):
+            payload = value.read()
+            value.seek(0)
+            value = payload
+        elif isinstance(value, io.BytesIO):
+            value = value.getvalue()
+        if not self.empty_allowed and not value:
+            raise ValueError("Missing value (empty not allowed)")
+        return value
+
+    def get_libzim_value(self) -> bytes:
+        return self.value
+
+    def validate(self) -> None:
+        _ = self.name
+        _ = self.libzim_value
+
+
+@mandatory
+@expecting_text
+class NameMetadata(Metadata):
+    meta_name: str = "Name"
+
 
+@mandatory
+@expecting_textlist
+@only_lang_codes
+class LanguageMetadata(Metadata):
+    meta_name: str = "Language"
+    join_list_with: str = ","
 
-class NameMetadata(_MandatoryTextMetadata):
-    """The Name metadata"""
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Name", value)
+@mandatory
+@expecting_text
+class TitleMetadata(Metadata):
+    meta_name: str = "Title"
+    oz_max_length: int = RECOMMENDED_MAX_TITLE_LENGTH
 
 
-class CreatorMetadata(_MandatoryTextMetadata):
-    """The Creator metadata"""
+@mandatory
+@expecting_text
+class CreatorMetadata(Metadata):
+    meta_name: str = "Creator"
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Creator", value)
 
+@mandatory
+@expecting_text
+class PublisherMetadata(Metadata):
+    meta_name: str = "Publisher"
 
-class PublisherMetadata(_MandatoryTextMetadata):
-    """The Publisher metadata"""
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Publisher", value)
+# TODO
+@mandatory
+@expecting_date
+class DateMetadata(Metadata):
+    meta_name: str = "Date"
 
 
-class ScraperMetadata(_TextMetadata):
-    """The Scraper metadata"""
+@expecting_illustration
+class IllustrationMetadata(Metadata):
+    meta_name = "Illustration_{size}x{size}@{scale}"
+    illustration_size: int
+    illustration_scale: int = 1
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Scraper", value)
+    def __init__(
+        self, value: bytes | BinaryIO | io.BytesIO, size: int, scale: int = 1
+    ) -> None:
+        self.illustration_scale = scale
+        self.illustration_size = size
+        super().__init__(value=value)
 
 
-class FlavourMetadata(_TextMetadata):
-    """The Flavour metadata"""
+@mandatory
+@expecting_illustration
+class DefaultIllustrationMetadata(Metadata):
+    meta_name = "Illustration_48x48@1"
+    illustration_size: int = 48
+    illustration_scale: int = 1
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Flavour", value)
 
+@mandatory
+@expecting_text
+class DescriptionMetadata(Metadata):
+    meta_name: str = "Description"
+    oz_max_length: int = MAXIMUM_DESCRIPTION_METADATA_LENGTH
 
-class SourceMetadata(_TextMetadata):
-    """The Source metadata"""
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Source", value)
+@expecting_text
+class LongDescriptionMetadata(Metadata):
+    meta_name: str = "LongDescription"
+    oz_max_length: int = MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
 
 
-class LicenseMetadata(_TextMetadata):
-    """The License metadata"""
+@deduplicate
+@expecting_textlist
+class TagsMetadata(Metadata):
+    meta_name: str = "Tags"
+    join_list_with: str = ";"
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("License", value)
 
+@expecting_text
+class ScraperMetadata(Metadata):
+    meta_name: str = "Scraper"
 
-class RelationMetadata(_TextMetadata):
-    """The Relation metadata"""
 
-    def __init__(self, value: bytes | io.BytesIO | str) -> None:
-        super().__init__("Relation", value)
+@expecting_text
+class FlavourMetadata(Metadata):
+    meta_name: str = "Flavour"
+
+
+@expecting_text
+class SourceMetadata(Metadata):
+    meta_name: str = "Source"
+
+
+@expecting_text
+class LicenseMetadata(Metadata):
+    meta_name: str = "License"
+
+
+@expecting_text
+class RelationMetadata(Metadata):
+    meta_name: str = "Relation"
 
 
 @dataclass
 class StandardMetadataList:
-    """A class holding all openZIM standard metadata
-
-    Useful to ensure that all mandatory metadata are set, no typo occurs in metadata
-    name and the specification is respected (forbidden duplicate values, ...).
-    """
 
     Name: NameMetadata
     Language: LanguageMetadata
@@ -428,7 +457,7 @@ class StandardMetadataList:
     Creator: CreatorMetadata
     Publisher: PublisherMetadata
     Date: DateMetadata
-    Illustration_48x48_at_1: IllustrationMetadata
+    Illustration_48x48_at_1: DefaultIllustrationMetadata
     Description: DescriptionMetadata
     LongDescription: LongDescriptionMetadata | None = None
     Tags: TagsMetadata | None = None
@@ -439,22 +468,57 @@ class StandardMetadataList:
     Relation: RelationMetadata | None = None
 
     def values(self) -> list[Metadata]:
-        return [v for v in self.__dict__.values() if v]
+        return list(filter(bool, asdict(self).values()))
+
+    @classmethod
+    def get_reserved_names(cls) -> list[str]:
+        """list of mandatory metadata as per the spec.
+
+        computed from metadata using @mandatory decorator"""
+        names = []
+        for field in fields(cls):
+            meta_type = globals().get(str(field.type))
+            if getattr(meta_type, "is_required", False):
+                names.append(getattr(meta_type, "meta_name", ""))
+        return names
+
+
+@x_protected
+class CustomMetadata(Metadata):
+    def __init__(self, name: str, value: bytes | BinaryIO | io.BytesIO) -> None:
+        self.meta_name = name
+        super().__init__(value=value)
+
+
+@x_protected
+@expecting_text
+class CustomTextMetadata(Metadata):
+    def __init__(self, name: str, value: str) -> None:
+        self.meta_name = name
+        super().__init__(name=name, value=value)
+
+
+@x_prefixed
+class XCustomMetadata(CustomMetadata): ...
+
+
+@x_prefixed
+class XCustomTextMetadata(CustomTextMetadata): ...
+
+
+MANDATORY_ZIM_METADATA_KEYS = StandardMetadataList.get_reserved_names()
 
 
-# A sample standard metadata list, to be used for dev purposes when one does not care
-# at all about metadata values ; this ensures all metadata are compliant with the spec
 DEFAULT_DEV_ZIM_METADATA = StandardMetadataList(
     Name=NameMetadata("Test Name"),
     Title=TitleMetadata("Test Title"),
     Creator=CreatorMetadata("Test Creator"),
     Publisher=PublisherMetadata("Test Publisher"),
-    Date=DateMetadata("2023-01-01"),
+    Date=DateMetadata(datetime.date(2023, 1, 1)),
     Description=DescriptionMetadata("Test Description"),
     Language=LanguageMetadata("fra"),
     # blank 48x48 transparent PNG
-    Illustration_48x48_at_1=IllustrationMetadata(
-        "Illustration_48x48@1",
+    Illustration_48x48_at_1=DefaultIllustrationMetadata(
         base64.b64decode(
             "iVBORw0KGgoAAAANSUhEUgAAADAAAAAwAQMAAABtzGvEAAAAGXRFWHRTb2Z0d2FyZQBB"
             "ZG9iZSBJbWFnZVJlYWR5ccllPAAAAANQTFRFR3BMgvrS0gAAAAF0Uk5TAEDm2GYAAAAN"
@@ -462,10 +526,3 @@ def values(self) -> list[Metadata]:
         ),
     ),
 )
-
-# list of mandatory metadata of the zim file, automatically computed
-MANDATORY_ZIM_METADATA_KEYS = [
-    metadata.name
-    for metadata in DEFAULT_DEV_ZIM_METADATA.__dict__.values()
-    if isinstance(metadata, _MandatoryTextMetadata | _MandatoryMetadata)
-]
diff --git a/tests/zim/conftest.py b/tests/zim/conftest.py
index eeaf4544..c2ff66ae 100644
--- a/tests/zim/conftest.py
+++ b/tests/zim/conftest.py
@@ -107,6 +107,6 @@ def counters():
 
 @pytest.fixture
 def ignore_metadata_conventions():
-    zimscraperlib.zim.metadata.check_metadata_conventions = False
+    zimscraperlib.zim.metadata.APPLY_RECOMMENDATIONS = False
     yield
-    zimscraperlib.zim.metadata.check_metadata_conventions = True
+    zimscraperlib.zim.metadata.APPLY_RECOMMENDATIONS = True
diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py
index 24cdc8e3..d202e303 100644
--- a/tests/zim/test_metadata.py
+++ b/tests/zim/test_metadata.py
@@ -9,6 +9,7 @@
 from typing import NamedTuple
 
 import pytest
+from beartype.roar import BeartypeCallHintParamViolation as InvalidType
 
 from zimscraperlib.zim import metadata
 
@@ -19,13 +20,13 @@
         ("fra"),
         ("eng"),
         ("bam"),
-        ("fra,eng"),
-        ("eng, fra"),
-        ("fra,eng,bam"),
         (["fra", "eng"]),
-        ("fra, eng"),  # codes are automatically cleaned-up
-        ("eng,  \r"),  # codes are automatically cleaned-up
-        (["fra", "  "]),  # codes are automatically cleaned-up
+        ("eng", "fra"),
+        ("fra", "eng", "bam"),
+        (["fra", "eng"]),
+        ("fra", "eng"),  # codes are automatically cleaned-up
+        ("eng\r"),  # codes are automatically cleaned-up
+        (["fra "]),  # codes are automatically cleaned-up
     ],
 )
 def test_validate_language_valid(value: list[str] | str):
@@ -33,27 +34,28 @@ def test_validate_language_valid(value: list[str] | str):
 
 
 @pytest.mark.parametrize(
-    "value,error",
+    "value,exception,error",
     [
-        ("", "Missing value for mandatory metadata"),
-        ("fr", "is not ISO-639-3"),
-        ("en", "is not ISO-639-3"),
-        ("xxx", "is not ISO-639-3"),
-        ("rmr", "is not ISO-639-3"),
-        ("fra,en,bam", "is not ISO-639-3"),
-        ("fra;eng", "is not ISO-639-3"),
-        ("fra,fra", "Duplicate codes not allowed in Language metadata"),
-        (["fra", "fra"], "Duplicate codes not allowed in Language metadata"),
-        ([], "Missing value for mandatory metadata"),
-        (["  ", "\t"], "Missing value for mandatory Language metadata"),
-        ("  , ", "Missing value for mandatory Language metadata"),
-        (["fra", 1], "Invalid type(s) found in Iterable:"),
-        (["fra", b"1"], "Invalid type(s) found in Iterable:"),
-        (["fra", 1, b"1"], "Invalid type(s) found in Iterable:"),
+        ("", ValueError, "Missing value (empty not allowed)"),
+        ("fr", ValueError, "are not ISO-639-3"),
+        ("en", ValueError, "are not ISO-639-3"),
+        ("xxx", ValueError, "are not ISO-639-3"),
+        ("rmr", ValueError, "are not ISO-639-3"),
+        ("fra,en,bam", ValueError, "are not ISO-639-3"),
+        (("fra", "en", "bam"), ValueError, "are not ISO-639-3"),
+        ("fra;eng", ValueError, "are not ISO-639-3"),
+        (("fra", "fra"), ValueError, "Duplicate entries not allowed"),
+        (["fra", "fra"], ValueError, "Duplicate entries not allowed"),
+        ([], ValueError, "Missing value (empty not allowed)"),
+        (["  ", "\t"], ValueError, "Missing value (empty not allowed)"),
+        (("  ", " "), ValueError, "Missing value (empty not allowed)"),
+        (["fra", 1], InvalidType, " violates type"),
+        (["fra", b"1"], InvalidType, " violates type"),
+        (["fra", 1, b"1"], InvalidType, " violates type"),
     ],
 )
-def test_validate_language_invalid(value: list[str] | str, error: str):
-    with pytest.raises(ValueError, match=re.escape(error)):
+def test_validate_language_invalid(value: list[str] | str, exception: type, error: str):
+    with pytest.raises(exception, match=re.escape(error)):
         assert metadata.LanguageMetadata(value)
 
 
@@ -84,26 +86,31 @@ def test_validate_tags_valid(tags, is_valid):
 
 
 @pytest.mark.parametrize(
-    "value, error",
+    "value, exception, error",
     [
-        pytest.param("", "Cannot set empty metadata", id="empty_string"),
-        pytest.param(1, "Unexpected type passed to TagsMetadata: int", id="one_int"),
         pytest.param(
-            None, "Unexpected type passed to TagsMetadata: NoneType", id="none_value"
+            "", ValueError, "Missing value (empty not allowed)", id="empty_string"
         ),
+        pytest.param(1, InvalidType, " violates type", id="one_int"),
         pytest.param(
-            ["wikipedia", 1], "Invalid type(s) found in Iterable: int", id="int_in_list"
+            None,
+            InvalidType,
+            " violates type",
+            id="none_value",
         ),
+        pytest.param(["wikipedia", 1], InvalidType, " violates type", id="int_in_list"),
     ],
 )
-def test_validate_tags_invalid(value: list[str] | str, error: str):
-    with pytest.raises(ValueError, match=re.escape(error)):
+def test_validate_tags_invalid(
+    value: list[str] | str | int, exception: type, error: str
+):
+    with pytest.raises(exception, match=re.escape(error)):
         metadata.TagsMetadata(value)
 
 
 def test_validate_dedup_tags():
     assert (
-        metadata.TagsMetadata("  wikipedia  \t   ;   wikipedia").libzim_value
+        metadata.TagsMetadata(["  wikipedia  \t   ", "   wikipedia"]).libzim_value
         == b"wikipedia"
     )
 
@@ -117,7 +124,7 @@ def test_validate_short_grapheme_title_check_enabled():
 
 
 def test_validate_too_long_title_check_enabled():
-    with pytest.raises(ValueError, match="Title is too long"):
+    with pytest.raises(ValueError, match="Title value is too long"):
         assert metadata.TitleMetadata("T" * 31)
 
 
@@ -136,7 +143,7 @@ def test_validate_short_grapheme_description_check_enabled():
 
 
 def test_validate_too_long_description_check_enabled():
-    with pytest.raises(ValueError, match="Description is too long"):
+    with pytest.raises(ValueError, match="Description value is too long"):
         assert metadata.DescriptionMetadata("T" * 81)
 
 
@@ -155,7 +162,7 @@ def test_validate_short_grapheme_longdescription_check_enabled():
 
 
 def test_validate_too_long_longdescription_check_enabled():
-    with pytest.raises(ValueError, match="Description is too long"):
+    with pytest.raises(ValueError, match="LongDescription value is too long"):
         assert metadata.LongDescriptionMetadata("T" * 4001)
 
 
@@ -175,20 +182,23 @@ def test_validate_date_datetime_datetime():
     )
 
 
-@pytest.mark.parametrize("value", [("9999-99-99"), ("2023/02/29"), ("1969-13-31")])
-def test_validate_date_invalid_date(value):
-    with pytest.raises(ValueError, match="Invalid date format"):
-        metadata.DateMetadata(value)
+def test_validate_date_invalid_datee():
+    assert metadata.DateMetadata(
+        datetime.datetime(9, 12, 11, 0, 0, 0, tzinfo=datetime.UTC)
+    )
 
 
-def test_validate_illustration_invalid_name():
-    with pytest.raises(ValueError, match="Illustration metadata has improper name"):
-        metadata.IllustrationMetadata("Illustration_48x48_at_1", b"")
+@pytest.mark.parametrize("value", [("9999-99-99"), ("2023/02/29"), ("1969-13-31")])
+def test_validate_date_invalid_datetype(value):
+    with pytest.raises(InvalidType, match="violates type hint"):
+        metadata.DateMetadata(value)
 
 
-def test_validate_illustration_not_squared():
-    with pytest.raises(ValueError, match="Illustration is not squared"):
-        metadata.IllustrationMetadata("Illustration_48x96@1", b"")
+def test_validate_illustration_invalid_image():
+    with pytest.raises(
+        ValueError, match="Illustration_48x48@1 is not a valid 48x48 PNG Image"
+    ):
+        metadata.IllustrationMetadata(b"PN", size=48)
 
 
 def test_validate_illustration_wrong_sizes(png_image2):
@@ -197,12 +207,12 @@ def test_validate_illustration_wrong_sizes(png_image2):
     with pytest.raises(
         ValueError, match="Illustration_48x48@1 is not a valid 48x48 PNG Image"
     ):
-        metadata.IllustrationMetadata("Illustration_48x48@1", png_data)
+        metadata.IllustrationMetadata(png_data, size=48)
 
 
 def test_blank_metadata():
-    with pytest.raises(ValueError, match="Cannot set empty metadata"):
-        metadata.Metadata("Blank", b"")
+    with pytest.raises(ValueError, match=r"Missing value \(empty not allowed\)"):
+        metadata.Metadata(name="Blank", value=b"")
 
 
 class MetadataInitConfig(NamedTuple):
@@ -213,15 +223,15 @@ class MetadataInitConfig(NamedTuple):
 @pytest.fixture(
     params=[
         MetadataInitConfig(metadata.Metadata, 2),
-        MetadataInitConfig(metadata._TextMetadata, 2),
         MetadataInitConfig(metadata.CustomTextMetadata, 2),
         MetadataInitConfig(metadata.CustomMetadata, 2),
-        MetadataInitConfig(metadata._MandatoryTextMetadata, 2),
-        MetadataInitConfig(metadata._MandatoryMetadata, 2),
+        MetadataInitConfig(metadata.XCustomMetadata, 2),
+        MetadataInitConfig(metadata.XCustomTextMetadata, 2),
         MetadataInitConfig(metadata.TitleMetadata, 1),
         MetadataInitConfig(metadata.DescriptionMetadata, 1),
         MetadataInitConfig(metadata.LongDescriptionMetadata, 1),
         MetadataInitConfig(metadata.DateMetadata, 1),
+        MetadataInitConfig(metadata.DefaultIllustrationMetadata, 1),
         MetadataInitConfig(metadata.IllustrationMetadata, 2),
         MetadataInitConfig(metadata.LanguageMetadata, 1),
         MetadataInitConfig(metadata.TagsMetadata, 1),
@@ -239,35 +249,6 @@ def metadata_init(request: pytest.FixtureRequest) -> MetadataInitConfig:
     return request.param
 
 
-def test_none_metadata_two_args(metadata_init: MetadataInitConfig):
-    with pytest.raises(
-        ValueError,
-        match=f"Unexpected type passed to {metadata_init.a_type.__qualname__}: "
-        "NoneType",
-    ):
-        if metadata_init.nb_args == 2:
-            metadata_init.a_type(
-                "Foo",
-                None,  # pyright: ignore[reportArgumentType]
-            )
-        elif metadata_init.nb_args == 1:
-            metadata_init.a_type(None)
-
-
-@pytest.mark.parametrize(
-    "a_type",
-    [
-        (metadata.Metadata),
-        (metadata.CustomTextMetadata),
-        (metadata._TextMetadata),
-        (metadata._MandatoryMetadata),
-    ],
-)
-def test_reserved_counter_metadata(a_type: type):
-    with pytest.raises(ValueError, match="Counter cannot be set. libzim sets it."):
-        a_type("Counter", b"Foo")
-
-
 @pytest.mark.parametrize(
     "metadata_name",
     [
@@ -287,6 +268,7 @@ def test_reserved_counter_metadata(a_type: type):
         ("Source"),
         ("License"),
         ("Relation"),
+        ("Counter"),
     ],
 )
 @pytest.mark.parametrize(
@@ -297,119 +279,168 @@ def test_reserved_counter_metadata(a_type: type):
     ],
 )
 def test_reserved_names(metadata_name: str, metadata_type: type):
-    with pytest.raises(ValueError, match="It is not allowed to use"):
-        metadata_type(metadata_name, b"a value")
+    with pytest.raises(ValueError, match="Custom metdata name must be X- prefixed"):
+        value = "x" if metadata_type == metadata.CustomTextMetadata else b"x"
+        metadata_type(name=metadata_name, value=value)
 
 
-def test_mandatory_value(metadata_init: MetadataInitConfig):
+@pytest.mark.parametrize(
+    "metadata_name",
+    [
+        ("Nameo"),
+        ("Languageo"),
+        ("Titleo"),
+        ("Creatoro"),
+        ("Publishero"),
+        ("Dateo"),
+        ("Illustration_48x48@1o"),
+        ("Illustration_96x96@1o"),
+        ("Descriptiono"),
+        ("LongDescriptiono"),
+        ("Tagso"),
+        ("Scrapero"),
+        ("Flavouro"),
+        ("Sourceo"),
+        ("Licenseo"),
+        ("Relationo"),
+        ("Countero"),
+    ],
+)
+@pytest.mark.parametrize(
+    "metadata_type",
+    [
+        (metadata.CustomMetadata),
+        (metadata.CustomTextMetadata),
+    ],
+)
+def test_nonreserved_custom(metadata_name: str, metadata_type: type):
+    value = "x" if metadata_type == metadata.CustomTextMetadata else b"x"
+    assert metadata_type(name=metadata_name, value=value)
 
-    with pytest.raises(
-        ValueError,
-        match=(
-            "is not a valid 48x48 PNG Image"
-            if metadata_init.a_type == metadata.IllustrationMetadata
-            else (
-                "Invalid date format, not matching regex"
-                if metadata_init.a_type == metadata.DateMetadata
-                else "Cannot set empty metadata|Missing value for mandatory metadata"
-            )
-        ),
-    ):
-        if metadata_init.nb_args == 2:
-            metadata_init.a_type(
-                (
-                    "Foo"
-                    if metadata_init.a_type != metadata.IllustrationMetadata
-                    else "Illustration_48x48@1"
-                ),
-                b"",
-            )
-        elif metadata_init.nb_args == 1:
-            metadata_init.a_type(b"")
+
+def test_mandatory_value(metadata_init: MetadataInitConfig):
+    if not metadata_init.a_type.is_required:
+        pytest.skip("Only testing mandatory ones")
+    with pytest.raises(ValueError, match="Missing"):
+        if metadata_init.a_type.expecting == "textlist":
+            metadata_init.a_type([])
+            metadata_init.a_type(["", " "])
+        elif metadata_init.a_type.expecting == "text":
+            metadata_init.a_type("")
+            metadata_init.a_type(" ")
+        elif metadata_init.a_type.expecting == "date":
+            pytest.skip("Cannot set an empty Date")
+        elif metadata_init.a_type.expecting in ("illustration", "bytes"):
+            if metadata_init.nb_args == 1:
+                metadata_init.a_type(b"")
+                metadata_init.a_type(b" ")
+            else:
+                metadata_init.a_type(b"", name="Foo")
+                metadata_init.a_type(b" ", name="Foo")
 
 
 def test_clean_value(metadata_init: MetadataInitConfig):
-    raw_value = b"\t\n\r\n \tA val \t\awith  \bcontrol chars\v\n"
+    raw_value = "\t\n\r\n \tA val \t\awith  \bcontrol chars\v\n"
     clean_value = b"A val \twith  control chars"
-    if metadata_init.a_type in (
-        # some types do not support the raw value
-        metadata.IllustrationMetadata,
-        metadata.LanguageMetadata,
-        metadata.DateMetadata,
-        # and binary types are not cleaned-up
-        metadata.Metadata,
-        metadata.CustomMetadata,
-        metadata._MandatoryMetadata,
-    ):
-        pytest.skip("unsupported configuration")  # ignore these test cases
-    if metadata_init.nb_args == 2:
-        assert metadata_init.a_type(("Foo"), raw_value).libzim_value == clean_value
-    elif metadata_init.nb_args == 1:
+    raw_lang_value = "\t\n\r\n \tfra \t\a\b\v\n"
+    clean_lang_value = b"fra"
+    if metadata_init.a_type == metadata.LanguageMetadata:
+        assert metadata_init.a_type([raw_lang_value]).libzim_value == clean_lang_value
+    elif metadata_init.a_type.expecting == "textlist":
         assert metadata_init.a_type(raw_value).libzim_value == clean_value
+        assert metadata_init.a_type(
+            value=[raw_value, raw_value]
+        ).libzim_value == metadata_init.a_type.join_list_with.join(
+            [clean_value.decode("UTF-8")]
+        ).encode(
+            "UTF-8"
+        )
+    elif metadata_init.a_type.expecting == "text":
+        if metadata_init.nb_args == 1:
+            assert metadata_init.a_type(value=raw_value).libzim_value == clean_value
+        else:
+            assert (
+                metadata_init.a_type(value=raw_value, name="Foo").libzim_value
+                == clean_value
+            )
 
 
-# @pytest.mark.parametrize(
-#     "metadata, expected_value",
-#     [
-#         (metadata.Metadata("Foo", b"a value"), b"a value"),
-#         (metadata.Metadata("Foo", io.BytesIO(b"a value")), b"a value"),
-#         (metadata.DescriptionMetadata(io.BytesIO(b"a value")), b"a value"),
-#         (metadata.DescriptionMetadata(b"a value"), b"a value"),
-#         (metadata.DescriptionMetadata("a value"), b"a value"),
-#         (metadata.IllustrationMetadata(png), b"a value"),
-#     ],
-# )
 def test_libzim_bytes_value(metadata_init: MetadataInitConfig, png_image: pathlib.Path):
     if metadata_init.nb_args == 2:
         if metadata_init.a_type == metadata.IllustrationMetadata:
             with open(png_image, "rb") as fh:
                 png_data = fh.read()
+            assert metadata_init.a_type(png_data, 48).libzim_value == png_data
+        elif metadata_init.a_type in (
+            metadata.CustomTextMetadata,
+            metadata.XCustomTextMetadata,
+        ):
             assert (
-                metadata.IllustrationMetadata(
-                    "Illustration_48x48@1", png_data
-                ).libzim_value
-                == png_data
+                metadata_init.a_type(value="a value", name="Foo").libzim_value
+                == b"a value"
+            )
+        elif metadata_init.a_type in (
+            metadata.CustomMetadata,
+            metadata.XCustomMetadata,
+        ):
+            assert (
+                metadata_init.a_type(value=b"a value", name="Foo").libzim_value
+                == b"a value"
             )
         else:
-            assert metadata_init.a_type("Foo", b"a value").libzim_value == b"a value"
+            assert (
+                metadata_init.a_type(value=b"a value", name="Foo").libzim_value
+                == b"a value"
+            )
     elif metadata_init.nb_args == 1:
-        if metadata_init.a_type == metadata.DateMetadata:
-            assert metadata_init.a_type(b"2023-12-11").libzim_value == b"2023-12-11"
+        if metadata_init.a_type == metadata.DefaultIllustrationMetadata:
+            with open(png_image, "rb") as fh:
+                png_data = fh.read()
+            assert metadata_init.a_type(png_data).libzim_value == png_data
+        elif metadata_init.a_type == metadata.DateMetadata:
+            assert (
+                metadata_init.a_type(datetime.date(2023, 12, 11)).libzim_value
+                == b"2023-12-11"
+            )
         elif metadata_init.a_type == metadata.LanguageMetadata:
-            assert metadata_init.a_type(b"fra,eng").libzim_value == b"fra,eng"
+            assert metadata_init.a_type(["fra", "eng"]).libzim_value == b"fra,eng"
         else:
-            assert metadata_init.a_type(b"a value").libzim_value == b"a value"
+            assert metadata_init.a_type("a value").libzim_value == b"a value"
 
 
 def test_libzim_io_bytesio_value(
     metadata_init: MetadataInitConfig, png_image: pathlib.Path
 ):
-    if metadata_init.nb_args == 2:
-        if metadata_init.a_type == metadata.IllustrationMetadata:
-            with open(png_image, "rb") as fh:
-                png_data = fh.read()
-            assert (
-                metadata.IllustrationMetadata(
-                    "Illustration_48x48@1", io.BytesIO(png_data)
-                ).libzim_value
-                == png_data
-            )
-        else:
+    if metadata_init.a_type.expecting == "illustration":
+        with open(png_image, "rb") as fh:
+            png_data = fh.read()
+            if metadata_init.nb_args == 1:
+                assert (
+                    metadata_init.a_type(value=io.BytesIO(png_data)).libzim_value
+                    == png_data
+                )
+            else:
+                assert (
+                    metadata_init.a_type(
+                        value=io.BytesIO(png_data), size=48
+                    ).libzim_value
+                    == png_data
+                )
+    if metadata_init.a_type.expecting == "bytes":
+        if metadata_init.nb_args == 1:
             assert (
-                metadata_init.a_type("Foo", io.BytesIO(b"a value")).libzim_value
+                metadata_init.a_type(value=io.BytesIO(b"a value")).libzim_value
                 == b"a value"
             )
-    elif metadata_init.nb_args == 1:
-        if metadata_init.a_type == metadata.DateMetadata:
-            pass  # Not supported
-        elif metadata_init.a_type == metadata.LanguageMetadata:
-            assert (
-                metadata_init.a_type(io.BytesIO(b"fra,eng")).libzim_value == b"fra,eng"
-            )
         else:
             assert (
-                metadata_init.a_type(io.BytesIO(b"a value")).libzim_value == b"a value"
+                metadata_init.a_type(
+                    value=io.BytesIO(b"a value"), name="Foo"
+                ).libzim_value
+                == b"a value"
             )
+    else:
+        pytest.skip("Type doesnt support bytes-like input")
 
 
 def test_std_metadata_values():
@@ -424,12 +455,11 @@ def test_std_metadata_values():
         metadata.TitleMetadata("Test Title"),
         metadata.CreatorMetadata("Test Creator"),
         metadata.PublisherMetadata("Test Publisher"),
-        metadata.DateMetadata("2023-01-01"),
+        metadata.DateMetadata(datetime.date(2023, 1, 1)),
         metadata.DescriptionMetadata("Test Description"),
         metadata.LanguageMetadata("fra"),
         # blank 48x48 transparent PNG
-        metadata.IllustrationMetadata(
-            "Illustration_48x48@1",
+        metadata.DefaultIllustrationMetadata(
             base64.b64decode(
                 "iVBORw0KGgoAAAANSUhEUgAAADAAAAAwAQMAAABtzGvEAAAAGXRFWHRTb2Z0d2FyZQBB"
                 "ZG9iZSBJbWFnZVJlYWR5ccllPAAAAANQTFRFR3BMgvrS0gAAAAF0Uk5TAEDm2GYAAAAN"
@@ -448,3 +478,7 @@ def test_std_metadata_values():
             pytest.fail(
                 f"Did not found matching expected values for {value.name} metadata"
             )
+
+
+def test_raw_metadata():
+    assert metadata.Metadata(name="Name", value=b"hello")
diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py
index 2fd7f3f2..d3a0a40c 100644
--- a/tests/zim/test_zim_creator.py
+++ b/tests/zim/test_zim_creator.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import datetime
 import io
 import logging
 import pathlib
@@ -21,6 +22,7 @@
 from zimscraperlib.constants import UTF8
 from zimscraperlib.download import save_large_file, stream_file
 from zimscraperlib.filesystem import delete_callback
+from zimscraperlib.typing import Callback
 from zimscraperlib.zim import Archive, Creator, StaticItem, URLItem
 from zimscraperlib.zim.metadata import (
     DEFAULT_DEV_ZIM_METADATA,
@@ -28,9 +30,9 @@
     CustomMetadata,
     CustomTextMetadata,
     DateMetadata,
+    DefaultIllustrationMetadata,
     DescriptionMetadata,
     FlavourMetadata,
-    IllustrationMetadata,
     LanguageMetadata,
     LicenseMetadata,
     LongDescriptionMetadata,
@@ -76,7 +78,7 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn:
     with open(png_image, "rb") as fh:
         png_data = fh.read()
     with Creator(fpath, main_path).config_dev_metadata(
-        [TagsMetadata(tags), IllustrationMetadata("Illustration_48x48@1", png_data)]
+        [TagsMetadata(tags), DefaultIllustrationMetadata(png_data)]
     ) as creator:
         # verbatim HTML from string
         creator.add_item_for("welcome", "wel", content=html_str, is_front=True)
@@ -108,10 +110,12 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn:
     assert fpath.exists()
 
     reader = Archive(fpath)
-    assert reader.get_text_metadata("Title") == DEFAULT_DEV_ZIM_METADATA.Title.value
-    assert (
-        reader.get_text_metadata("Language") == DEFAULT_DEV_ZIM_METADATA.Language.value
-    )
+    assert reader.get_text_metadata(
+        "Title"
+    ) == DEFAULT_DEV_ZIM_METADATA.Title.libzim_value.decode("UTF-8")
+    assert reader.get_text_metadata(
+        "Language"
+    ) == DEFAULT_DEV_ZIM_METADATA.Language.libzim_value.decode("UTF-8")
     assert reader.get_text_metadata("Tags") == tags
     assert reader.main_entry.get_item().path == f"{main_path}"
     # make sure we have our image
@@ -143,11 +147,10 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn:
 def test_create_without_workaround(tmp_path):
     fpath = tmp_path / "test.zim"
 
-    with Creator(
-        fpath, "welcome", workaround_nocancel=False
-    ).config_dev_metadata() as creator:
-        with pytest.raises(RuntimeError, match="AttributeError"):
-            creator.add_item("hello")  # pyright: ignore [reportArgumentType]
+    with Creator(fpath, "welcome", workaround_nocancel=False).config_dev_metadata():
+        with pytest.raises(RuntimeError):
+            raise RuntimeError("erroring")
+    assert fpath.exists()
 
 
 def test_noindexlanguage(tmp_path):
@@ -207,7 +210,7 @@ def test_add_item_for_delete_fail(tmp_path, png_image):
                 filepath=local_path,
                 path="index",
             ),
-            callback=(delete_callback, local_path),
+            callbacks=Callback(func=delete_callback, kwargs={"fpath": local_path}),
         )
     assert not local_path.exists()
 
@@ -226,21 +229,6 @@ def test_add_item_empty_content(tmp_path):
         )
 
 
-@pytest.mark.parametrize("auto_index", [False, True])
-def test_add_item_for_unsupported_content_type(auto_index, tmp_path):
-    fpath = tmp_path / "test.zim"
-    # test with incorrect content type
-    with Creator(fpath, "welcome").config_dev_metadata() as creator:
-        with pytest.raises(RuntimeError):
-            creator.add_item_for(
-                path="welcome",
-                title="hello",
-                mimetype="text/plain",
-                content=123,  # pyright: ignore[reportArgumentType]
-                auto_index=auto_index,
-            )
-
-
 def test_compression(tmp_path):
     fpath = tmp_path / "test.zim"
     with Creator(
@@ -249,7 +237,7 @@ def test_compression(tmp_path):
         creator.add_item(StaticItem(path="welcome", content="hello"))
 
     with Creator(
-        fpath, "welcome", compression=Compression.zstd  # pyright: ignore
+        fpath, "welcome", compression=Compression.zstd.name
     ).config_dev_metadata() as creator:
         creator.add_item(StaticItem(path="welcome", content="hello"))
 
@@ -297,7 +285,7 @@ def test_sourcefile_removal_std(tmp_path, html_file):
                     path=paths[-1].name,
                     mimetype="text/html",
                 ),
-                callback=(delete_callback, paths[-1]),
+                callbacks=Callback(func=delete_callback, kwargs={"fpath": paths[-1]}),
             )
     for path in paths:
         assert not path.exists()
@@ -485,7 +473,8 @@ def cb():
 
     with Creator(fpath, "").config_dev_metadata() as creator:
         creator.add_item(
-            StaticItem(path=html_file.name, filepath=html_file), callback=cb
+            StaticItem(path=html_file.name, filepath=html_file),
+            callbacks=Callback(func=cb),
         )
 
     assert Store.called is True
@@ -514,13 +503,16 @@ def cb(*args):  # noqa: ARG001
 
     with Creator(tmp_path / "test.zim", "").config_dev_metadata() as creator:
         creator.add_item_for(
-            path=html_file.name, fpath=html_file, delete_fpath=True, callback=cb
+            path=html_file.name,
+            fpath=html_file,
+            delete_fpath=True,
+            callbacks=Callback(func=cb),
         )
         creator.add_item_for(
             path=html_file2.name,
             fpath=html_file2,
             delete_fpath=True,
-            callback=(cb, html_file.name),
+            callbacks=Callback(func=cb, args=(html_file.name,)),
         )
 
     assert not html_file.exists()
@@ -557,10 +549,6 @@ def test_without_metadata(tmp_path):
 @pytest.mark.parametrize(
     "tags",
     [
-        (
-            "wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:yes;"
-            "_ftindex:yes"
-        ),
         (
             [
                 "wikipedia",
@@ -569,23 +557,31 @@ def test_without_metadata(tmp_path):
                 "_videos:no",
                 "_details:yes",
                 "_ftindex:yes",
+                "wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:yes;"
+                "_ftindex:yes",
             ]
         ),
     ],
 )
 @patch("zimscraperlib.zim.creator.logger", autospec=True)
-def test_start_logs_metadata_log_contents(mocked_logger, png_image, tags, tmp_path):
+def test_start_logs_metadata_log_contents(
+    mocked_logger,
+    png_image,
+    tags,
+    tmp_path,
+    ignore_metadata_conventions,  # noqa: ARG001
+):
     mocked_logger.isEnabledFor.side_effect = lambda level: level == logging.DEBUG
     fpath = tmp_path / "test_config.zim"
     with open(png_image, "rb") as fh:
         png_data = fh.read()
-    creator = Creator(fpath, "", check_metadata_conventions=False).config_metadata(
+    creator = Creator(fpath, "").config_metadata(
         StandardMetadataList(
             Name=NameMetadata("wikipedia_fr_football"),
             Title=TitleMetadata("English Wikipedia"),
             Creator=CreatorMetadata("English speaking Wikipedia contributors"),
             Publisher=PublisherMetadata("Wikipedia user Foobar"),
-            Date=DateMetadata("2009-11-21"),
+            Date=DateMetadata(datetime.date(2009, 11, 21)),
             Description=DescriptionMetadata(
                 "All articles (without images) from the english Wikipedia"
             ),
@@ -599,9 +595,7 @@ def test_start_logs_metadata_log_contents(mocked_logger, png_image, tags, tmp_pa
             Flavour=FlavourMetadata("nopic"),
             Source=SourceMetadata("https://en.wikipedia.org/"),
             Scraper=ScraperMetadata("mwoffliner 1.2.3"),
-            Illustration_48x48_at_1=IllustrationMetadata(
-                "Illustration_48x48@1", png_data
-            ),
+            Illustration_48x48_at_1=DefaultIllustrationMetadata(png_data),
         ),
         [CustomTextMetadata("TestMetadata", "Test Metadata")],
         fail_on_missing_prefix_in_extras=False,
@@ -614,14 +608,16 @@ def __str__(self):
     creator._metadata.update(
         {
             "Illustration_96x96@1": Metadata(
-                "Illustration_96x96@1", b"%PDF-1.5\n%\xe2\xe3\xcf\xd3"
+                value=b"%PDF-1.5\n%\xe2\xe3\xcf\xd3", name="Illustration_96x96@1"
             ),
-            "Chars": Metadata("Chars", b"\xc5\xa1\xc9\x94\xc9\x9b"),
+            "Chars": Metadata(name="Chars", value=b"\xc5\xa1\xc9\x94\xc9\x9b"),
             "Chars-32": Metadata(
-                "Chars-32", b"\xff\xfe\x00\x00a\x01\x00\x00T\x02\x00\x00[\x02\x00\x00"
+                name="Chars-32",
+                value=b"\xff\xfe\x00\x00a\x01\x00\x00T\x02\x00\x00[\x02\x00\x00",
             ),
             "Video": Metadata(
-                "Video", b"\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00"
+                name="Video",
+                value=b"\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00",
             ),
             "Toupie": CustomTextMetadata("Toupie", NotPrintable("value")),
         }  # intentionaly bad, to handle case where user does bad things
@@ -649,7 +645,7 @@ def __str__(self):
             call("Metadata: Flavour = nopic"),
             call("Metadata: Illustration_48x48@1 is a 3274 bytes 48x48px PNG Image"),
             call("Metadata: Illustration_96x96@1 is a 14 bytes application/pdf blob"),
-            call("Metadata: Language = eng"),
+            call("Metadata: Language = ['eng']"),
             call("Metadata: License = CC-BY"),
             call(
                 "Metadata: LongDescription = This ZIM file contains all articles "
@@ -664,10 +660,8 @@ def __str__(self):
             call(f"Metadata: Tags = {tags}"),
             call("Metadata: TestMetadata = Test Metadata"),
             call("Metadata: Title = English Wikipedia"),
-            call(
-                "Metadata: Toupie is unexpected data type: "
-                "test_start_logs_metadata_log_contents.<locals>.NotPrintable"
-            ),
+            # cleaned-up anyway
+            call("Metadata: Toupie = value"),
             call("Metadata: Video is a 33 bytes video/mp4 blob"),
         ]
     )
@@ -677,9 +671,7 @@ def test_relax_metadata(
     tmp_path,
     ignore_metadata_conventions,  # noqa: ARG001
 ):
-    Creator(tmp_path, "", check_metadata_conventions=False).config_dev_metadata(
-        DescriptionMetadata("T" * 90)
-    ).start()
+    Creator(tmp_path, "").config_dev_metadata(DescriptionMetadata("T" * 90)).start()
 
 
 @pytest.mark.parametrize(
@@ -711,7 +703,7 @@ def test_config_metadata(tmp_path, png_image, tags):
             Title=TitleMetadata("English Wikipedia"),
             Creator=CreatorMetadata("English speaking Wikipedia contributors"),
             Publisher=PublisherMetadata("Wikipedia user Foobar"),
-            Date=DateMetadata("2009-11-21"),
+            Date=DateMetadata(datetime.date(2009, 11, 21)),
             Description=DescriptionMetadata(
                 "All articles (without images) from the english Wikipedia"
             ),
@@ -725,9 +717,7 @@ def test_config_metadata(tmp_path, png_image, tags):
             Flavour=FlavourMetadata("nopic"),
             Source=SourceMetadata("https://en.wikipedia.org/"),
             Scraper=ScraperMetadata("mwoffliner 1.2.3"),
-            Illustration_48x48_at_1=IllustrationMetadata(
-                "Illustration_48x48@1", png_data
-            ),
+            Illustration_48x48_at_1=DefaultIllustrationMetadata(png_data),
         ),
         [CustomTextMetadata("X-TestMetadata", "Test Metadata")],
     )

From c95a58f7375095e3019435c08adcc8574fe379bf Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Thu, 12 Dec 2024 10:07:21 +0000
Subject: [PATCH 07/13] Upgrading dev dependencies

Reasoning: coverage reported a lot of missing lines on zim/metadata.py with previous version

Also includes auto linting where new ruff complained
---
 pyproject.toml                        | 12 ++++++------
 src/zimscraperlib/image/conversion.py |  2 +-
 src/zimscraperlib/zim/__init__.py     | 10 +++++-----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a5776507..13e05bca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,19 +53,19 @@ scripts = [
 ]
 lint = [
   "black==24.10.0",
-  "ruff==0.7.0",
+  "ruff==0.8.2",
 ]
 check = [
-  "pyright==1.1.385",
-  "pytest==8.3.3",
+  "pyright==1.1.390",
+  "pytest==8.3.4",
 ]
 test = [
-  "pytest==8.3.3",
+  "pytest==8.3.4",
   "pytest-mock==3.14.0",
-  "coverage==7.5.3",
+  "coverage==7.6.9",
 ]
 dev = [
-  "ipython==8.25.0",
+  "ipython==8.30.0",
   "pre-commit==4.0.1",
   "zimscraperlib[scripts]",
   "zimscraperlib[lint]",
diff --git a/src/zimscraperlib/image/conversion.py b/src/zimscraperlib/image/conversion.py
index ed4522d0..de53bea9 100644
--- a/src/zimscraperlib/image/conversion.py
+++ b/src/zimscraperlib/image/conversion.py
@@ -37,7 +37,7 @@ def convert_image(
     if not fmt:
         raise ValueError("Impossible to guess destination image format")
     with pilopen(src) as image:
-        if image.mode == "RGBA" and fmt in ALPHA_NOT_SUPPORTED or colorspace:
+        if (image.mode == "RGBA" and fmt in ALPHA_NOT_SUPPORTED) or colorspace:
             image = image.convert(colorspace or "RGB")  # noqa: PLW2901
         save_image(image, dst, fmt, **params)
 
diff --git a/src/zimscraperlib/zim/__init__.py b/src/zimscraperlib/zim/__init__.py
index 6b0b1714..da74ab69 100644
--- a/src/zimscraperlib/zim/__init__.py
+++ b/src/zimscraperlib/zim/__init__.py
@@ -27,14 +27,14 @@
 
 __all__ = [
     "Archive",
+    "Blob",
     "Creator",
-    "make_zim_file",
+    "FileLikeProvider",
+    "FileProvider",
     "Item",
     "StaticItem",
-    "URLItem",
-    "FileProvider",
     "StringProvider",
-    "FileLikeProvider",
+    "URLItem",
     "URLProvider",
-    "Blob",
+    "make_zim_file",
 ]

From fca42e432ef84e1e0190b01ba4273b06a1465b38 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Thu, 12 Dec 2024 15:37:39 +0000
Subject: [PATCH 08/13] Using per-type base classes for Metadata

In order to properly expose input type in __init__ (for pyright and user assit),
use one base class (subclassing Metadata) per input type.

Cant get rid of the `Any` on `Metadata` init (otherwise would me re-implement the init everywhere).

Used the opportunity to remove the `expecting` classvar and modified tests accordingly

- Also fixed a minor issue in bytes reading by seeking back to previous position and not zero.
- Also shared binary reading logic inside main base class (was already there) so it can be reused in illustration
- Now explicitly says the type of stored data (can be different to inputs in somewhat flexible ones)
---
 src/zimscraperlib/zim/metadata.py | 317 ++++++++++++++----------------
 tests/zim/test_metadata.py        |  54 +++--
 2 files changed, 188 insertions(+), 183 deletions(-)

diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py
index 6fc5c439..63f40bd6 100644
--- a/src/zimscraperlib/zim/metadata.py
+++ b/src/zimscraperlib/zim/metadata.py
@@ -44,132 +44,6 @@ def mandatory(cls):
     return cls
 
 
-def expecting_text(cls):
-    """Expects a Text (str) input. Will be cleaned-up and UTF-8 encoded"""
-    cls.expecting = "text"
-    cls.require_text_cleanup = True
-    cls.require_utf8_encoding = True
-
-    # native type is str
-    def get_cleaned_value(self, value: str) -> str:
-        if self.require_text_cleanup:
-            value = clean_str(value)
-
-        if not self.empty_allowed and not value.strip():
-            raise ValueError("Missing value (empty not allowed)")
-
-        # max-length is openZIM recommendation
-        if getattr(self, "oz_max_length", 0) and APPLY_RECOMMENDATIONS:
-            if nb_grapheme_for(value) > self.oz_max_length:
-                raise ValueError(
-                    f"{self.name} value is too long ({self.oz_max_length})"
-                )
-        return value
-
-    def get_libzim_value(self) -> bytes:
-        return self.get_encoded(self.value)
-
-    cls.get_cleaned_value = get_cleaned_value
-    cls.get_libzim_value = get_libzim_value
-    return cls
-
-
-def expecting_textlist(cls):
-    """Expects a Text List (list[str]) input. Each item will be cleaned-up.
-
-    List will be joined (see `join_list_with`) and UTF-8 encoded"""
-    cls.expecting = "textlist"
-    cls.require_textlist_cleanup = True
-    cls.require_utf8_encoding = True
-
-    # native type is list[str]
-    def get_cleaned_value(self, value: Iterable[str] | str) -> list[str]:
-        if isinstance(value, str):
-            value = [value]
-        else:
-            value = list(value)
-        if self.require_textlist_cleanup:
-            value = [clean_str(item) for item in value]
-        if not self.empty_allowed and (not value or not all(value)):
-            raise ValueError("Missing value (empty not allowed)")
-        if not self.duplicates_allowed and len(set(value)) != len(value):
-            raise ValueError("Duplicate entries not allowed")
-        elif self.require_deduplication:
-            value = unique_values(value)
-        if self.oz_only_iso636_3_allowed and APPLY_RECOMMENDATIONS:
-            invalid_codes = list(filterfalse(is_valid_iso_639_3, value))
-            if invalid_codes:
-                raise ValueError(
-                    f"Following code(s) are not ISO-639-3: {','.join(invalid_codes)}"
-                )
-        return value
-
-    def get_libzim_value(self) -> bytes:
-        return self.get_encoded(self.join_list_with.join(self.value))
-
-    cls.get_cleaned_value = get_cleaned_value
-    cls.get_libzim_value = get_libzim_value
-    return cls
-
-
-def expecting_date(cls):
-    """Expects a Date (date | datetime) input. Will be UTF-8 encoded as YYYY-MM-DD"""
-    cls.expecting = "date"
-    cls.require_utf8_encoding = True
-
-    # native type is date
-    def get_cleaned_value(
-        self, value: datetime.date | datetime.datetime  # noqa: ARG001
-    ) -> datetime.date:
-        if isinstance(value, datetime.datetime):
-            value = value.date()
-        return value
-
-    def get_libzim_value(self) -> bytes:
-        return self.get_encoded(self.value.strftime("%Y-%m-%d"))
-
-    cls.get_cleaned_value = get_cleaned_value
-    cls.get_libzim_value = get_libzim_value
-    return cls
-
-
-def expecting_illustration(cls):
-    """Expects a Square PNG Illustration (bytes-like) input.
-
-    PNG format and squareness will be checked"""
-    cls.expecting = "illustration"
-    cls.meta_mimetype = "image/png"
-
-    # native type is PNG image buffer
-    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
-        if isinstance(value, BinaryIO):
-            payload = value.read()
-            value.seek(0)
-            value = payload
-        elif isinstance(value, io.BytesIO):
-            value = value.getvalue()
-        if not self.empty_allowed and not value:
-            raise ValueError("Missing value (empty not allowed)")
-
-        if not is_valid_image(
-            image=value,
-            imformat="PNG",
-            size=(self.illustration_size, self.illustration_size),
-        ):
-            raise ValueError(
-                f"{self.name} is not a valid "
-                f"{self.illustration_size}x{self.illustration_size} PNG Image"
-            )
-        return value
-
-    def get_libzim_value(self) -> bytes:
-        return self.value
-
-    cls.get_cleaned_value = get_cleaned_value
-    cls.get_libzim_value = get_libzim_value
-    return cls
-
-
 def allow_empty(cls):
     """Whether input can be blank"""
     cls.empty_allowed = True
@@ -213,7 +87,7 @@ def x_prefixed(cls):
 class Metadata:
     # name of the metadata (not its value)
     meta_name: str
-    value: Any
+    value: bytes
 
     # MIME type of the value
     meta_mimetype: str
@@ -221,9 +95,6 @@ class Metadata:
     # whether metadata is required or not
     is_required: bool = False
 
-    # what kind of input data is expected
-    expecting: str = "bytes"  # text | textlist | date | illustration
-
     # str: text value must be cleaned-up
     require_text_cleanup: bool = False
 
@@ -287,7 +158,9 @@ def matches_reserved_name(name: str) -> bool:
         return bool(ILLUSTRATIONS_METADATA_RE.match(name))
 
     def get_name(self) -> str:
-        if self.expecting == "illustration":
+        if getattr(self, "illustration_size", 0) and getattr(
+            self, "illustration_scale", 0
+        ):
             return (
                 f"Illustration_{self.illustration_size}x{self.illustration_size}"
                 f"@{self.illustration_scale}"
@@ -321,11 +194,11 @@ def get_encoded(value: str) -> bytes:
     def libzim_value(self) -> bytes:
         return self.get_libzim_value()
 
-    # native type is bytes
-    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+    def get_binary_from(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
         if isinstance(value, BinaryIO):
+            last_pos = value.tell()
             payload = value.read()
-            value.seek(0)
+            value.seek(last_pos)
             value = payload
         elif isinstance(value, io.BytesIO):
             value = value.getvalue()
@@ -333,6 +206,10 @@ def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
             raise ValueError("Missing value (empty not allowed)")
         return value
 
+    # native type is bytes
+    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+        return self.get_binary_from(value)
+
     def get_libzim_value(self) -> bytes:
         return self.value
 
@@ -341,48 +218,160 @@ def validate(self) -> None:
         _ = self.libzim_value
 
 
+class TextBasedMetadata(Metadata):
+    """Expects a Text (str) input. Will be cleaned-up and UTF-8 encoded"""
+
+    value: str
+    require_text_cleanup = True
+    require_utf8_encoding = True
+
+    def __init__(self, value: str, name: str | None = None) -> None:
+        super().__init__(value=value, name=name)
+
+    # native type is str
+    def get_cleaned_value(self, value: str) -> str:
+        if self.require_text_cleanup:
+            value = clean_str(value)
+
+        if not self.empty_allowed and not value.strip():
+            raise ValueError("Missing value (empty not allowed)")
+
+        # max-length is openZIM recommendation
+        if getattr(self, "oz_max_length", 0) and APPLY_RECOMMENDATIONS:
+            if nb_grapheme_for(value) > self.oz_max_length:
+                raise ValueError(
+                    f"{self.name} value is too long ({self.oz_max_length})"
+                )
+        return value
+
+    def get_libzim_value(self) -> bytes:
+        return self.get_encoded(self.value)
+
+
+class TextListBasedMetadata(Metadata):
+    """Expects a Text List (list[str]) input. Each item will be cleaned-up.
+
+    List will be joined (see `join_list_with`) and UTF-8 encoded"""
+
+    value: list[str]  # we accept single str input but store as list[str]
+    require_textlist_cleanup = True
+    require_utf8_encoding = True
+
+    def __init__(self, value: Iterable[str] | str, name: str | None = None) -> None:
+        super().__init__(value=value, name=name)
+
+    # native type is list[str]
+    def get_cleaned_value(self, value: Iterable[str] | str) -> list[str]:
+        if isinstance(value, str):
+            value = [value]
+        else:
+            value = list(value)
+        if self.require_textlist_cleanup:
+            value = [clean_str(item) for item in value]
+        if not self.empty_allowed and (not value or not all(value)):
+            raise ValueError("Missing value (empty not allowed)")
+        if not self.duplicates_allowed and len(set(value)) != len(value):
+            raise ValueError("Duplicate entries not allowed")
+        elif self.require_deduplication:
+            value = unique_values(value)
+        if self.oz_only_iso636_3_allowed and APPLY_RECOMMENDATIONS:
+            invalid_codes = list(filterfalse(is_valid_iso_639_3, value))
+            if invalid_codes:
+                raise ValueError(
+                    f"Following code(s) are not ISO-639-3: {','.join(invalid_codes)}"
+                )
+        return value
+
+    def get_libzim_value(self) -> bytes:
+        return self.get_encoded(self.join_list_with.join(self.value))
+
+
+class DateBasedMetadata(Metadata):
+    """Expects a Date (date | datetime) input. Will be UTF-8 encoded as YYYY-MM-DD"""
+
+    value: datetime.date
+    require_utf8_encoding = True
+
+    def __init__(
+        self, value: datetime.date | datetime.datetime, name: str | None = None
+    ) -> None:
+        super().__init__(value=value, name=name)
+
+    # native type is date
+    def get_cleaned_value(self, value: datetime.date) -> datetime.date:
+        if isinstance(value, datetime.datetime):
+            value = value.date()
+        return value
+
+    def get_libzim_value(self) -> bytes:
+        return self.get_encoded(self.value.strftime("%Y-%m-%d"))
+
+
+class IllustrationBasedMetadata(Metadata):
+    """Expects a Square PNG Illustration (bytes-like) input.
+
+    PNG format and squareness will be checked"""
+
+    value: bytes
+    meta_mimetype = "image/png"
+
+    def __init__(
+        self, value: bytes | BinaryIO | io.BytesIO, name: str | None = None
+    ) -> None:
+        super().__init__(value=value, name=name)
+
+    # native type is PNG image buffer
+    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+        value = self.get_binary_from(value)
+        if not is_valid_image(
+            image=value,
+            imformat="PNG",
+            size=(self.illustration_size, self.illustration_size),
+        ):
+            raise ValueError(
+                f"{self.name} is not a valid "
+                f"{self.illustration_size}x{self.illustration_size} PNG Image"
+            )
+        return value
+
+    def get_libzim_value(self) -> bytes:
+        return self.value
+
+
 @mandatory
-@expecting_text
-class NameMetadata(Metadata):
+class NameMetadata(TextBasedMetadata):
     meta_name: str = "Name"
 
 
 @mandatory
-@expecting_textlist
 @only_lang_codes
-class LanguageMetadata(Metadata):
+class LanguageMetadata(TextListBasedMetadata):
     meta_name: str = "Language"
     join_list_with: str = ","
 
 
 @mandatory
-@expecting_text
-class TitleMetadata(Metadata):
+class TitleMetadata(TextBasedMetadata):
     meta_name: str = "Title"
     oz_max_length: int = RECOMMENDED_MAX_TITLE_LENGTH
 
 
 @mandatory
-@expecting_text
-class CreatorMetadata(Metadata):
+class CreatorMetadata(TextBasedMetadata):
     meta_name: str = "Creator"
 
 
 @mandatory
-@expecting_text
-class PublisherMetadata(Metadata):
+class PublisherMetadata(TextBasedMetadata):
     meta_name: str = "Publisher"
 
 
-# TODO
 @mandatory
-@expecting_date
-class DateMetadata(Metadata):
+class DateMetadata(DateBasedMetadata):
     meta_name: str = "Date"
 
 
-@expecting_illustration
-class IllustrationMetadata(Metadata):
+class IllustrationMetadata(IllustrationBasedMetadata):
     meta_name = "Illustration_{size}x{size}@{scale}"
     illustration_size: int
     illustration_scale: int = 1
@@ -396,55 +385,46 @@ def __init__(
 
 
 @mandatory
-@expecting_illustration
-class DefaultIllustrationMetadata(Metadata):
+class DefaultIllustrationMetadata(IllustrationBasedMetadata):
     meta_name = "Illustration_48x48@1"
     illustration_size: int = 48
     illustration_scale: int = 1
 
 
 @mandatory
-@expecting_text
-class DescriptionMetadata(Metadata):
+class DescriptionMetadata(TextBasedMetadata):
     meta_name: str = "Description"
     oz_max_length: int = MAXIMUM_DESCRIPTION_METADATA_LENGTH
 
 
-@expecting_text
-class LongDescriptionMetadata(Metadata):
+class LongDescriptionMetadata(TextBasedMetadata):
     meta_name: str = "LongDescription"
     oz_max_length: int = MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
 
 
 @deduplicate
-@expecting_textlist
-class TagsMetadata(Metadata):
+class TagsMetadata(TextListBasedMetadata):
     meta_name: str = "Tags"
     join_list_with: str = ";"
 
 
-@expecting_text
-class ScraperMetadata(Metadata):
+class ScraperMetadata(TextBasedMetadata):
     meta_name: str = "Scraper"
 
 
-@expecting_text
-class FlavourMetadata(Metadata):
+class FlavourMetadata(TextBasedMetadata):
     meta_name: str = "Flavour"
 
 
-@expecting_text
-class SourceMetadata(Metadata):
+class SourceMetadata(TextBasedMetadata):
     meta_name: str = "Source"
 
 
-@expecting_text
-class LicenseMetadata(Metadata):
+class LicenseMetadata(TextBasedMetadata):
     meta_name: str = "License"
 
 
-@expecting_text
-class RelationMetadata(Metadata):
+class RelationMetadata(TextBasedMetadata):
     meta_name: str = "Relation"
 
 
@@ -491,8 +471,7 @@ def __init__(self, name: str, value: bytes | BinaryIO | io.BytesIO) -> None:
 
 
 @x_protected
-@expecting_text
-class CustomTextMetadata(Metadata):
+class CustomTextMetadata(TextBasedMetadata):
     def __init__(self, name: str, value: str) -> None:
         self.meta_name = name
         super().__init__(name=name, value=value)
diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py
index d202e303..8bd3560f 100644
--- a/tests/zim/test_metadata.py
+++ b/tests/zim/test_metadata.py
@@ -6,7 +6,8 @@
 import io
 import pathlib
 import re
-from typing import NamedTuple
+from types import NoneType
+from typing import BinaryIO, NamedTuple
 
 import pytest
 from beartype.roar import BeartypeCallHintParamViolation as InvalidType
@@ -14,6 +15,18 @@
 from zimscraperlib.zim import metadata
 
 
+def get_classvar_value_type(cls: type) -> type:
+    """expected type of value classvar for a Metadata type
+
+    Found in class annotations of type or any of its parent class"""
+    target = cls
+    while target:
+        if value_type := getattr(target, "__annotations__", {}).get("value", False):
+            return value_type
+        target = getattr(target, "__base__", None)
+    return NoneType
+
+
 @pytest.mark.parametrize(
     "value",
     [
@@ -105,7 +118,7 @@ def test_validate_tags_invalid(
     value: list[str] | str | int, exception: type, error: str
 ):
     with pytest.raises(exception, match=re.escape(error)):
-        metadata.TagsMetadata(value)
+        metadata.TagsMetadata(value)  # pyright: ignore [reportArgumentType]
 
 
 def test_validate_dedup_tags():
@@ -322,21 +335,30 @@ def test_mandatory_value(metadata_init: MetadataInitConfig):
     if not metadata_init.a_type.is_required:
         pytest.skip("Only testing mandatory ones")
     with pytest.raises(ValueError, match="Missing"):
-        if metadata_init.a_type.expecting == "textlist":
+        if issubclass(metadata_init.a_type, metadata.TextListBasedMetadata):
             metadata_init.a_type([])
             metadata_init.a_type(["", " "])
-        elif metadata_init.a_type.expecting == "text":
+        elif issubclass(metadata_init.a_type, metadata.TextBasedMetadata):
             metadata_init.a_type("")
             metadata_init.a_type(" ")
-        elif metadata_init.a_type.expecting == "date":
+        elif issubclass(metadata_init.a_type, metadata.DateBasedMetadata):
             pytest.skip("Cannot set an empty Date")
-        elif metadata_init.a_type.expecting in ("illustration", "bytes"):
+        elif issubclass(metadata_init.a_type, metadata.DefaultIllustrationMetadata):
+            metadata_init.a_type(b"")
+            metadata_init.a_type(b" ")
+        elif get_classvar_value_type(metadata_init.a_type) is bytes:
             if metadata_init.nb_args == 1:
-                metadata_init.a_type(b"")
-                metadata_init.a_type(b" ")
+                metadata_init.a_type(b"")  # pyright: ignore[reportCallIssue]
+                metadata_init.a_type(b" ")  # pyright: ignore[reportCallIssue]
             else:
-                metadata_init.a_type(b"", name="Foo")
-                metadata_init.a_type(b" ", name="Foo")
+                metadata_init.a_type(
+                    b"", name="Foo"  # pyright: ignore[reportCallIssue]
+                )
+                metadata_init.a_type(
+                    b" ", name="Foo"  # pyright: ignore[reportCallIssue]
+                )
+        else:
+            raise OSError("WTF")
 
 
 def test_clean_value(metadata_init: MetadataInitConfig):
@@ -346,7 +368,7 @@ def test_clean_value(metadata_init: MetadataInitConfig):
     clean_lang_value = b"fra"
     if metadata_init.a_type == metadata.LanguageMetadata:
         assert metadata_init.a_type([raw_lang_value]).libzim_value == clean_lang_value
-    elif metadata_init.a_type.expecting == "textlist":
+    elif isinstance(metadata_init.a_type, metadata.TextListBasedMetadata):
         assert metadata_init.a_type(raw_value).libzim_value == clean_value
         assert metadata_init.a_type(
             value=[raw_value, raw_value]
@@ -355,7 +377,7 @@ def test_clean_value(metadata_init: MetadataInitConfig):
         ).encode(
             "UTF-8"
         )
-    elif metadata_init.a_type.expecting == "text":
+    elif isinstance(metadata_init.a_type, metadata.TextBasedMetadata):
         if metadata_init.nb_args == 1:
             assert metadata_init.a_type(value=raw_value).libzim_value == clean_value
         else:
@@ -411,7 +433,7 @@ def test_libzim_bytes_value(metadata_init: MetadataInitConfig, png_image: pathli
 def test_libzim_io_bytesio_value(
     metadata_init: MetadataInitConfig, png_image: pathlib.Path
 ):
-    if metadata_init.a_type.expecting == "illustration":
+    if isinstance(metadata_init.a_type, metadata.IllustrationBasedMetadata):
         with open(png_image, "rb") as fh:
             png_data = fh.read()
             if metadata_init.nb_args == 1:
@@ -426,7 +448,11 @@ def test_libzim_io_bytesio_value(
                     ).libzim_value
                     == png_data
                 )
-    if metadata_init.a_type.expecting == "bytes":
+    if get_classvar_value_type(metadata_init.a_type) in (
+        bytes,
+        BinaryIO,
+        io.BytesIO,
+    ):
         if metadata_init.nb_args == 1:
             assert (
                 metadata_init.a_type(value=io.BytesIO(b"a value")).libzim_value

From 68844ab3467dfeb5dc3ff7a1087842bc3e44c343 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Fri, 13 Dec 2024 12:09:07 +0000
Subject: [PATCH 09/13] Removed unreachable code in metadata log

---
 src/zimscraperlib/zim/creator.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
index 318abeb9..af81f298 100644
--- a/src/zimscraperlib/zim/creator.py
+++ b/src/zimscraperlib/zim/creator.py
@@ -191,14 +191,7 @@ def _log_metadata(self):
                     )
                 continue
 
-            # rest is either printable or unexpected
-            try:
-                logger.debug(f"Metadata: {name} = {metadata.value!s}")
-            except Exception:
-                logger.debug(
-                    f"Metadata: {name} is unexpected data type: "
-                    f"{type(metadata.value).__qualname__}"
-                )
+            logger.debug(f"Metadata: {name} = {metadata.value!s}")
 
     def _get_first_language_metadata_value(self) -> str | None:
         """Private methods to get most popular lang code from Language metadata"""

From 6a2ea9ba655588a57122b5712e3f1236a77a1c37 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Fri, 13 Dec 2024 12:19:17 +0000
Subject: [PATCH 10/13] Use illustration API for all illustration based
 metadata

---
 src/zimscraperlib/zim/creator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
index af81f298..e18e9e85 100644
--- a/src/zimscraperlib/zim/creator.py
+++ b/src/zimscraperlib/zim/creator.py
@@ -43,7 +43,7 @@
 from zimscraperlib.zim.metadata import (
     DEFAULT_DEV_ZIM_METADATA,
     MANDATORY_ZIM_METADATA_KEYS,
-    IllustrationMetadata,
+    IllustrationBasedMetadata,
     LanguageMetadata,
     Metadata,
     StandardMetadataList,
@@ -229,7 +229,7 @@ def start(self):
         super().__enter__()
 
         for metadata in self._metadata.values():
-            if isinstance(metadata, IllustrationMetadata):
+            if isinstance(metadata, IllustrationBasedMetadata):
                 self.add_illustration(metadata.illustration_size, metadata.libzim_value)
             else:
                 self.add_metadata(metadata)

From 48ba93b4a1da52feb25b30ea1b8a62ea1fb98818 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Fri, 13 Dec 2024 12:20:11 +0000
Subject: [PATCH 11/13] Fixed IO/bytes based input

---
 src/zimscraperlib/zim/metadata.py | 35 ++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py
index 63f40bd6..b9a8b69d 100644
--- a/src/zimscraperlib/zim/metadata.py
+++ b/src/zimscraperlib/zim/metadata.py
@@ -6,7 +6,7 @@
 from collections.abc import Iterable
 from dataclasses import asdict, dataclass, fields
 from itertools import filterfalse
-from typing import Any, BinaryIO
+from typing import Any
 
 import regex
 
@@ -194,20 +194,25 @@ def get_encoded(value: str) -> bytes:
     def libzim_value(self) -> bytes:
         return self.get_libzim_value()
 
-    def get_binary_from(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
-        if isinstance(value, BinaryIO):
-            last_pos = value.tell()
-            payload = value.read()
-            value.seek(last_pos)
-            value = payload
-        elif isinstance(value, io.BytesIO):
-            value = value.getvalue()
+    def get_binary_from(self, value: bytes | io.IOBase | io.BytesIO) -> bytes:
+        bvalue: bytes = b""
+        if isinstance(value, io.BytesIO):
+            bvalue = value.getvalue()
+        elif isinstance(value, bytes):
+            bvalue = value
+        else:
+            last_pos: int
+            if value.seekable():
+                last_pos = value.tell()
+            bvalue = value.read()
+            if value.seekable():
+                value.seek(last_pos)
         if not self.empty_allowed and not value:
             raise ValueError("Missing value (empty not allowed)")
-        return value
+        return bvalue
 
     # native type is bytes
-    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+    def get_cleaned_value(self, value: bytes | io.IOBase | io.BytesIO) -> bytes:
         return self.get_binary_from(value)
 
     def get_libzim_value(self) -> bytes:
@@ -316,12 +321,12 @@ class IllustrationBasedMetadata(Metadata):
     meta_mimetype = "image/png"
 
     def __init__(
-        self, value: bytes | BinaryIO | io.BytesIO, name: str | None = None
+        self, value: bytes | io.IOBase | io.BytesIO, name: str | None = None
     ) -> None:
         super().__init__(value=value, name=name)
 
     # native type is PNG image buffer
-    def get_cleaned_value(self, value: bytes | BinaryIO | io.BytesIO) -> bytes:
+    def get_cleaned_value(self, value: bytes | io.IOBase | io.BytesIO) -> bytes:
         value = self.get_binary_from(value)
         if not is_valid_image(
             image=value,
@@ -377,7 +382,7 @@ class IllustrationMetadata(IllustrationBasedMetadata):
     illustration_scale: int = 1
 
     def __init__(
-        self, value: bytes | BinaryIO | io.BytesIO, size: int, scale: int = 1
+        self, value: bytes | io.IOBase | io.BytesIO, size: int, scale: int = 1
     ) -> None:
         self.illustration_scale = scale
         self.illustration_size = size
@@ -465,7 +470,7 @@ def get_reserved_names(cls) -> list[str]:
 
 @x_protected
 class CustomMetadata(Metadata):
-    def __init__(self, name: str, value: bytes | BinaryIO | io.BytesIO) -> None:
+    def __init__(self, name: str, value: bytes | io.IOBase | io.BytesIO) -> None:
         self.meta_name = name
         super().__init__(value=value)
 

From e669d24252c3a433838e566b4a54a149e2dc4fc6 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Fri, 13 Dec 2024 12:22:19 +0000
Subject: [PATCH 12/13] Added useless init for CustomMetadata to please
 coverage

---
 src/zimscraperlib/zim/metadata.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py
index b9a8b69d..b7e8b012 100644
--- a/src/zimscraperlib/zim/metadata.py
+++ b/src/zimscraperlib/zim/metadata.py
@@ -483,14 +483,20 @@ def __init__(self, name: str, value: str) -> None:
 
 
 @x_prefixed
-class XCustomMetadata(CustomMetadata): ...
+class XCustomMetadata(CustomMetadata):
+    # reimpl just to please coverage
+    def __init__(self, name: str, value: bytes | io.IOBase | io.BytesIO) -> None:
+        super().__init__(name=name, value=value)
 
 
 @x_prefixed
-class XCustomTextMetadata(CustomTextMetadata): ...
+class XCustomTextMetadata(CustomTextMetadata):
+    # reimpl just to please coverage
+    def __init__(self, name: str, value: str) -> None:
+        super().__init__(name=name, value=value)
 
 
-MANDATORY_ZIM_METADATA_KEYS = StandardMetadataList.get_reserved_names()
+MANDATORY_ZIM_METADATA_KEYS: list[str] = StandardMetadataList.get_reserved_names()
 
 
 DEFAULT_DEV_ZIM_METADATA = StandardMetadataList(

From 7e2efa19141d8062a780fffe8db81abf774533e8 Mon Sep 17 00:00:00 2001
From: rgaudin <reg@rskg.org>
Date: Fri, 13 Dec 2024 12:23:33 +0000
Subject: [PATCH 13/13] updated tests for metadata/illus/typing updates

---
 tests/zim/test_metadata.py    | 99 +++++++++++++++++++++++++++++++++++
 tests/zim/test_typing.py      | 40 ++++++++++++++
 tests/zim/test_zim_creator.py | 53 +++++++++++++++++++
 3 files changed, 192 insertions(+)
 create mode 100644 tests/zim/test_typing.py

diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py
index 8bd3560f..434d5b11 100644
--- a/tests/zim/test_metadata.py
+++ b/tests/zim/test_metadata.py
@@ -508,3 +508,102 @@ def test_std_metadata_values():
 
 def test_raw_metadata():
     assert metadata.Metadata(name="Name", value=b"hello")
+
+
+def test_decorators():
+    @metadata.allow_empty
+    class MyMetadata(metadata.XCustomTextMetadata): ...
+
+    assert MyMetadata("Test", "value").libzim_value == b"value"
+
+    @metadata.allow_duplicates
+    class MyMetadata2(metadata.TextListBasedMetadata):
+        join_list_with = "|"
+
+    assert (
+        MyMetadata2(name="Test", value=["value", "hello", "value"]).libzim_value
+        == b"value|hello|value"
+    )
+
+    raw_value = "\t\n\r\n \tA val \t\awith  \bcontrol chars\v\n"
+
+    class MyMetadata3(metadata.TextBasedMetadata):
+        require_text_cleanup = False
+
+    assert MyMetadata3(name="Test", value=raw_value).libzim_value == raw_value.encode(
+        "UTF-8"
+    )
+
+    @metadata.allow_duplicates
+    class MyMetadata4(metadata.TextListBasedMetadata):
+        require_textlist_cleanup = False
+        join_list_with = "|"
+
+    assert MyMetadata4(
+        name="Test", value=(raw_value, raw_value)
+    ).libzim_value == "|".join([raw_value, raw_value]).encode("UTF-8")
+
+
+def test_custom_ones():
+    textval = "value"
+    value = textval.encode("UTF-8")
+    assert metadata.CustomMetadata("Test", value).libzim_value == value
+    assert metadata.XCustomMetadata("Name", value).libzim_value == value
+    assert metadata.XCustomMetadata("Name", value).name == "X-Name"
+    assert metadata.CustomTextMetadata("Test", textval).libzim_value == value
+    assert metadata.XCustomTextMetadata("Name", textval).libzim_value == value
+    assert metadata.XCustomTextMetadata("Name", textval).name == "X-Name"
+    with pytest.raises(ValueError, match="must be X- prefixed"):
+        metadata.CustomMetadata("Name", value)
+    with pytest.raises(ValueError, match="must be X- prefixed"):
+        metadata.CustomTextMetadata("Name", textval)
+
+
+def test_mandatory_zim_metadata_keys():
+    # as per the spec on 2024-12-13
+    assert len(metadata.MANDATORY_ZIM_METADATA_KEYS) >= 8
+    assert "Illustration_48x48@1" in metadata.MANDATORY_ZIM_METADATA_KEYS
+
+
+def test_default_dev_zim_metadata():
+    assert isinstance(metadata.DEFAULT_DEV_ZIM_METADATA, metadata.StandardMetadataList)
+    # as per the spec on 2024-12-13
+    assert len(metadata.DEFAULT_DEV_ZIM_METADATA.values()) == 8
+
+
+def test_get_binary_from(png_image):
+    with open(png_image, "rb") as fh:
+        png_data = fh.read()
+    # bytes input
+    assert metadata.Metadata(value=png_data, name="Test").libzim_value == png_data
+    # io.BytesIO input
+    assert (
+        metadata.Metadata(value=io.BytesIO(png_data), name="Test").libzim_value
+        == png_data
+    )
+    # BaseIO input
+    with open(png_image, "rb") as fh:
+        assert metadata.Metadata(value=fh, name="Test").libzim_value == png_data
+
+    # unseekbale BaseIO
+    def notseekable():
+        return False
+
+    with open(png_image, "rb") as fh:
+        fh.seekable = notseekable
+        assert metadata.Metadata(value=fh, name="Test").libzim_value == png_data
+
+
+def test_ensure_missingname_raises():
+    with pytest.raises(OSError, match="name missing"):
+        metadata.Metadata(b"yello")
+
+
+def test_mimetype_usage():
+    mimetype = "video/webm"
+    assert metadata.Metadata(b"hello", "Test", mimetype=mimetype).mimetype == mimetype
+    assert metadata.Metadata(b"hello", "Test").mimetype == "text/plain;charset=UTF-8"
+    assert (
+        metadata.DEFAULT_DEV_ZIM_METADATA.Illustration_48x48_at_1.mimetype
+        == "image/png"
+    )
diff --git a/tests/zim/test_typing.py b/tests/zim/test_typing.py
new file mode 100644
index 00000000..a262d8f2
--- /dev/null
+++ b/tests/zim/test_typing.py
@@ -0,0 +1,40 @@
+from zimscraperlib.typing import Callback
+
+
+class GlobalCounter:
+    count: int = 0
+
+
+def update_counter() -> int:
+    GlobalCounter.count += 1
+    return GlobalCounter.count
+
+
+def update_counter_args(bump: int) -> int:
+    GlobalCounter.count += bump
+    return GlobalCounter.count
+
+
+def update_counter_kwargs(*, bump: int) -> int:
+    GlobalCounter.count += bump
+    return GlobalCounter.count
+
+
+def update_counter_args_and_kwargs(bump: int, *, factor: int) -> int:
+    GlobalCounter.count += bump * factor
+    return GlobalCounter.count
+
+
+def test_callback_init():
+    assert Callback(update_counter)
+    assert Callback(update_counter).callable
+    GlobalCounter.count = 0
+    assert GlobalCounter.count == 0
+    Callback(update_counter_args, args=(1,)).call()
+    assert GlobalCounter.count == 1
+    Callback(update_counter_args, kwargs={"bump": 1}).call()
+    assert GlobalCounter.count == 2
+    Callback(update_counter_kwargs, kwargs={"bump": 1}).call()
+    assert GlobalCounter.count == 3
+    Callback(update_counter_args_and_kwargs, args=(1,), kwargs={"factor": 2}).call()
+    assert GlobalCounter.count == 5
diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py
index d3a0a40c..78c324ea 100644
--- a/tests/zim/test_zim_creator.py
+++ b/tests/zim/test_zim_creator.py
@@ -181,6 +181,14 @@ def test_add_item_for(tmp_path):
             creator.add_item_for(path="welcome", title="hello")
 
 
+def test_additem_bad_content(tmp_path):
+    with Creator(tmp_path / "test.zim", "welcome").config_dev_metadata() as creator:
+        with pytest.raises(RuntimeError, match="Unexpected type for content"):
+            si = StaticItem(path="welcome", content="hello")
+            si.content = 1  # pyright: ignore[reportAttributeAccessIssue]
+            creator.add_item(si)
+
+
 def test_add_item_for_delete(tmp_path, html_file):
     fpath = tmp_path / "test.zim"
     local_path = pathlib.Path(tmp_path / "somefile.html")
@@ -480,6 +488,51 @@ def cb():
     assert Store.called is True
 
 
+def test_item_callbacks(tmp_path):
+    fpath = tmp_path / "test.zim"
+
+    class Store:
+        called = 0
+
+    def cb():
+        Store.called += 1
+
+    with Creator(fpath, "").config_dev_metadata() as creator:
+        creator.add_item_for(
+            path="hello",
+            content="hello",
+            callbacks=[Callback(func=cb), Callback(func=cb)],
+        )
+
+    assert Store.called == 2
+
+    class UnCallableCallback(Callback):
+        @property
+        def callable(self) -> bool:
+            return False
+
+    with Creator(fpath, "").config_dev_metadata() as creator:
+        # +2
+        creator.add_item(
+            StaticItem(path="hello", content="hello"),
+            callbacks=[Callback(func=cb), Callback(func=cb)],
+        )
+        # + 0
+        creator.add_item(StaticItem(path="hello2", content="hello"))
+        # +1
+        creator.add_item(
+            StaticItem(path="hello3", content="hello"),
+            callbacks=Callback(func=cb),
+        )
+        # + 0
+        creator.add_item(
+            StaticItem(path="hello4", content="hello"),
+            callbacks=UnCallableCallback(func=cb),
+        )
+
+    assert Store.called == 5
+
+
 def test_compess_hints(tmp_path, html_file):
     with Creator(tmp_path / "test.zim", "").config_dev_metadata() as creator:
         creator.add_item_for(