From 0f1f16377732e598bb9edf2ab4a945396367c84f Mon Sep 17 00:00:00 2001
From: John Franey <1728528+johnfraney@users.noreply.github.com>
Date: Tue, 7 Jan 2025 15:10:16 -0400
Subject: [PATCH] refactor: move functions out of top init file
---
blurry/__init__.py | 228 ++----------------------
blurry/file_processors/__init__.py | 166 +++++++++++++++++
blurry/gather_file_data_by_directory.py | 60 +++++++
blurry/runserver_handlers.py | 37 ++++
blurry/utils.py | 16 --
tests/test_utils.py | 2 +-
6 files changed, 274 insertions(+), 235 deletions(-)
create mode 100644 blurry/file_processors/__init__.py
create mode 100644 blurry/gather_file_data_by_directory.py
create mode 100644 blurry/runserver_handlers.py
diff --git a/blurry/__init__.py b/blurry/__init__.py
index cccf180..23eed9d 100644
--- a/blurry/__init__.py
+++ b/blurry/__init__.py
@@ -1,19 +1,12 @@
import asyncio
import concurrent.futures
-import dataclasses
import importlib
-import json
-import mimetypes
import os
import pkgutil
-import shutil
import sys
-from copy import deepcopy
from datetime import datetime
from pathlib import Path
-from typing import Any
-from jinja2 import Environment
from livereload import Server
from rich import print
from rich.console import Console
@@ -28,30 +21,18 @@
from blurry.commands.clean import clean_build_directory
from blurry.commands.init import initialize_new_project
from blurry.constants import ENV_VAR_PREFIX
-from blurry.images import generate_images_for_srcset
-from blurry.markdown import convert_markdown_file_to_html
-from blurry.open_graph import open_graph_meta_tags
-from blurry.plugins import discovered_html_plugins
-from blurry.schema_validation import validate_front_matter_as_schema
+from blurry.file_processors import process_non_markdown_file
+from blurry.file_processors import write_html_file
+from blurry.gather_file_data_by_directory import gather_file_data_by_directory
+from blurry.runserver_handlers import handle_changed_jinja_files
+from blurry.runserver_handlers import handle_changed_markdown_files
+from blurry.runserver_handlers import rebuild_markdown_files
from blurry.settings import get_build_directory
from blurry.settings import get_content_directory
from blurry.settings import SETTINGS
from blurry.settings import update_settings
from blurry.sitemap import write_sitemap_file
from blurry.templates import get_jinja_env
-from blurry.types import DirectoryFileData
-from blurry.types import MarkdownFileData
-from blurry.types import TemplateContext
-from blurry.utils import content_path_to_url
-from blurry.utils import convert_content_path_to_directory_in_build
-from blurry.utils import format_schema_data
-from blurry.utils import sort_directory_file_data_by_date
-from blurry.utils import write_index_file_creating_path
-
-
-def json_converter_with_dates(item: Any) -> None | str:
- if isinstance(item, datetime):
- return item.strftime("%Y-%M-%D")
warning_console = Console(stderr=True, style="bold yellow")
@@ -60,176 +41,6 @@ def json_converter_with_dates(item: Any) -> None | str:
app = AsyncTyper()
-def process_non_markdown_file(
- filepath: Path, file_data_by_directory, jinja_env: Environment
-):
- # Process Jinja files
- if ".jinja" in filepath.suffixes:
- process_jinja_file(filepath, jinja_env, file_data_by_directory)
- return
-
- CONTENT_DIR = get_content_directory()
- mimetype, _ = mimetypes.guess_type(filepath, strict=False)
- relative_filepath = filepath.relative_to(CONTENT_DIR)
- output_file = get_build_directory() / relative_filepath
-
- # Copy file to build directory if it is not already there
- if not output_file.exists():
- output_file.parent.mkdir(exist_ok=True, parents=True)
- shutil.copyfile(filepath, output_file)
-
- # Create srcset images
- if mimetype in [
- mimetypes.types_map[".jpg"],
- mimetypes.types_map[".png"],
- ]:
- asyncio.run(generate_images_for_srcset(filepath))
-
-
-def process_jinja_file(filepath: Path, jinja_env: Environment, file_data_by_directory):
- build_directory = get_build_directory()
- content_directory = get_content_directory()
- template = jinja_env.get_template(str(filepath.relative_to(content_directory)))
- context = {
- "file_data_by_directory": {
- str(path): data for path, data in deepcopy(file_data_by_directory).items()
- },
- "settings": deepcopy(SETTINGS),
- "datetime": datetime,
- }
- filepath_with_new_extension = filepath.with_suffix(
- filepath.suffix.replace(".jinja", "")
- )
- filepath_in_build = build_directory / filepath_with_new_extension.relative_to(
- content_directory
- )
- html = template.render(dataclasses=dataclasses, **context)
- filepath_in_build.write_text(html)
-
-
-def write_html_file(
- filepath: Path,
- file_data_by_directory: dict[Path, list[MarkdownFileData]],
- release: bool,
- jinja_env: Environment,
-):
- extra_context: TemplateContext = {}
- # Gather data from other files in this directory if this is an index file
- file_data_list = file_data_by_directory[filepath.parent]
- if filepath.name == "index.md":
- sibling_pages = [
- {
- "url": content_path_to_url(f.path),
- **f.front_matter,
- }
- for f in file_data_list
- if f.path != filepath
- ]
- extra_context["sibling_pages"] = sibling_pages
- folder_in_build = convert_content_path_to_directory_in_build(filepath)
-
- file_data = [
- f for f in file_data_by_directory[filepath.parent] if f.path == filepath
- ][0]
- schema_type = file_data.front_matter.get("@type")
- if not schema_type:
- raise ValueError(
- f"Required @type value missing in file or TOML front matter invalid: "
- f"{filepath}"
- )
- template_extension = SETTINGS["MARKDOWN_FILE_JINJA_TEMPLATE_EXTENSION"]
- template = jinja_env.get_template(f"{schema_type}{template_extension}")
-
- # Map custom template name to Schema.org type
- if mapped_schema_type := SETTINGS["TEMPLATE_SCHEMA_TYPES"].get(schema_type):
- file_data.front_matter["@type"] = mapped_schema_type
-
- # Include non-schema variables as top-level context values, removing them from
- # front_matter
- front_matter = file_data.front_matter
- schema_variables: TemplateContext = {}
- template_context: TemplateContext = {}
- non_schema_variable_prefix = SETTINGS["FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX"]
- for key, value in front_matter.items():
- if key.startswith(non_schema_variable_prefix):
- template_context[key.replace(non_schema_variable_prefix, "", 1)] = value
- continue
- schema_variables[key] = value
-
- schema_data = json.dumps(
- format_schema_data(schema_variables),
- default=json_converter_with_dates,
- )
-
- validate_front_matter_as_schema(filepath, front_matter, warning_console)
-
- schema_type_tag = f''
-
- template_context = {
- "body": file_data.body,
- "filepath": filepath,
- "schema_data": schema_data,
- "schema_type_tag": schema_type_tag,
- "open_graph_tags": open_graph_meta_tags(file_data.front_matter),
- "build_path": folder_in_build,
- "file_data_by_directory": {
- str(path): data for path, data in deepcopy(file_data_by_directory).items()
- },
- "settings": deepcopy(SETTINGS),
- **schema_variables,
- **deepcopy(extra_context),
- **template_context,
- }
-
- html = template.render(dataclasses=dataclasses, **template_context)
- for html_plugin in discovered_html_plugins:
- try:
- html = html_plugin.load()(html, template_context, release)
- except Exception as err:
- print(f"Error initializing plugin {html_plugin}: {err}")
-
- # Write file
- write_index_file_creating_path(folder_in_build, html)
-
-
-def gather_file_data_by_directory() -> DirectoryFileData:
- # Sort file data by publishedDate/createdDate, descending, if present
- file_data_by_directory: DirectoryFileData = {}
- content_directory = get_content_directory()
-
- markdown_future_to_path: dict[concurrent.futures.Future, Path] = {}
- with concurrent.futures.ProcessPoolExecutor() as executor:
- for filepath in content_directory.rglob("*.md"):
- # Extract filepath for storing context data and writing out
- relative_filepath = filepath.relative_to(content_directory)
-
- # Convert Markdown file to HTML
- future = executor.submit(convert_markdown_file_to_html, filepath)
- markdown_future_to_path[future] = relative_filepath
-
- for future in concurrent.futures.as_completed(markdown_future_to_path):
- body, front_matter = future.result()
- relative_filepath = markdown_future_to_path[future]
- if exception := future.exception():
- print(
- f"{relative_filepath}: Could not convert file to HTML - {exception}"
- )
- file_data = MarkdownFileData(
- body=body,
- front_matter=front_matter,
- path=relative_filepath,
- )
- parent_directory = relative_filepath.parent
- try:
- file_data_by_directory[parent_directory].append(file_data)
- except KeyError:
- file_data_by_directory[parent_directory] = [file_data]
-
- concurrent.futures.wait(markdown_future_to_path)
-
- return sort_directory_file_data_by_date(file_data_by_directory)
-
-
@app.command(name="clean")
def clean_command():
clean_build_directory()
@@ -358,33 +169,14 @@ def runserver():
jinja_env = get_jinja_env()
- def handle_changed_jinja_files(filepaths: list[str]):
- file_data_by_directory = gather_file_data_by_directory()
- for filepath in filepaths:
- process_jinja_file(
- Path.cwd() / filepath,
- jinja_env,
- file_data_by_directory,
- )
-
- def handle_changed_markdown_files(filepaths: list[str]):
- file_data_by_directory = gather_file_data_by_directory()
- content_directory = get_content_directory()
- for filepath in filepaths:
- write_html_file(
- filepath=(Path.cwd() / filepath).relative_to(content_directory),
- file_data_by_directory=file_data_by_directory,
- release=False,
- jinja_env=jinja_env,
- )
-
livereload_server = Server()
livereload_server.watch(
- f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.jinja", handle_changed_jinja_files
+ f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.jinja",
+ lambda filepaths: handle_changed_jinja_files(filepaths, jinja_env),
)
livereload_server.watch(
f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.md",
- handle_changed_markdown_files,
+ lambda filepaths: handle_changed_markdown_files(filepaths, jinja_env),
)
livereload_server.watch(
f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*",
@@ -399,7 +191,7 @@ def handle_changed_markdown_files(filepaths: list[str]):
)
livereload_server.watch(
f"{SETTINGS['TEMPLATES_DIRECTORY_NAME']}/**/*",
- lambda: event_loop.create_task(build(release=False)),
+ lambda: rebuild_markdown_files(jinja_env),
ignore=lambda filepath: Path(filepath).is_dir(),
)
livereload_server.watch(
diff --git a/blurry/file_processors/__init__.py b/blurry/file_processors/__init__.py
new file mode 100644
index 0000000..e43cc75
--- /dev/null
+++ b/blurry/file_processors/__init__.py
@@ -0,0 +1,166 @@
+import asyncio
+import dataclasses
+import json
+import mimetypes
+import shutil
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from jinja2 import Environment
+from rich.console import Console
+
+from blurry.images import generate_images_for_srcset
+from blurry.open_graph import open_graph_meta_tags
+from blurry.plugins import discovered_html_plugins
+from blurry.schema_validation import validate_front_matter_as_schema
+from blurry.settings import get_build_directory
+from blurry.settings import get_content_directory
+from blurry.settings import SETTINGS
+from blurry.types import MarkdownFileData
+from blurry.types import TemplateContext
+from blurry.utils import content_path_to_url
+from blurry.utils import convert_content_path_to_directory_in_build
+from blurry.utils import format_schema_data
+from blurry.utils import write_index_file_creating_path
+
+
+warning_console = Console(stderr=True, style="bold yellow")
+
+
+def process_non_markdown_file(
+ filepath: Path, file_data_by_directory, jinja_env: Environment
+):
+ # Process Jinja files
+ if ".jinja" in filepath.suffixes:
+ process_jinja_file(filepath, jinja_env, file_data_by_directory)
+ return
+
+ CONTENT_DIR = get_content_directory()
+ mimetype, _ = mimetypes.guess_type(filepath, strict=False)
+ relative_filepath = filepath.relative_to(CONTENT_DIR)
+ output_file = get_build_directory() / relative_filepath
+
+ # Copy file to build directory if it is not already there
+ if not output_file.exists():
+ output_file.parent.mkdir(exist_ok=True, parents=True)
+ shutil.copyfile(filepath, output_file)
+
+ # Create srcset images
+ if mimetype in [
+ mimetypes.types_map[".jpg"],
+ mimetypes.types_map[".png"],
+ ]:
+ asyncio.run(generate_images_for_srcset(filepath))
+
+
+def process_jinja_file(filepath: Path, jinja_env: Environment, file_data_by_directory):
+ build_directory = get_build_directory()
+ content_directory = get_content_directory()
+ template = jinja_env.get_template(str(filepath.relative_to(content_directory)))
+ context = {
+ "file_data_by_directory": {
+ str(path): data for path, data in deepcopy(file_data_by_directory).items()
+ },
+ "settings": deepcopy(SETTINGS),
+ "datetime": datetime,
+ }
+ filepath_with_new_extension = filepath.with_suffix(
+ filepath.suffix.replace(".jinja", "")
+ )
+ filepath_in_build = build_directory / filepath_with_new_extension.relative_to(
+ content_directory
+ )
+ html = template.render(dataclasses=dataclasses, **context)
+ filepath_in_build.write_text(html)
+
+
+def json_converter_with_dates(item: Any) -> None | str:
+ if isinstance(item, datetime):
+ return item.strftime("%Y-%M-%D")
+
+
+def write_html_file(
+ filepath: Path,
+ file_data_by_directory: dict[Path, list[MarkdownFileData]],
+ release: bool,
+ jinja_env: Environment,
+):
+ extra_context: TemplateContext = {}
+ # Gather data from other files in this directory if this is an index file
+ file_data_list = file_data_by_directory[filepath.parent]
+ if filepath.name == "index.md":
+ sibling_pages = [
+ {
+ "url": content_path_to_url(f.path),
+ **f.front_matter,
+ }
+ for f in file_data_list
+ if f.path != filepath
+ ]
+ extra_context["sibling_pages"] = sibling_pages
+ folder_in_build = convert_content_path_to_directory_in_build(filepath)
+
+ file_data = [
+ f for f in file_data_by_directory[filepath.parent] if f.path == filepath
+ ][0]
+ schema_type = file_data.front_matter.get("@type")
+ if not schema_type:
+ raise ValueError(
+ f"Required @type value missing in file or TOML front matter invalid: "
+ f"{filepath}"
+ )
+ template_extension = SETTINGS["MARKDOWN_FILE_JINJA_TEMPLATE_EXTENSION"]
+ template = jinja_env.get_template(f"{schema_type}{template_extension}")
+
+ # Map custom template name to Schema.org type
+ if mapped_schema_type := SETTINGS["TEMPLATE_SCHEMA_TYPES"].get(schema_type):
+ file_data.front_matter["@type"] = mapped_schema_type
+
+ # Include non-schema variables as top-level context values, removing them from
+ # front_matter
+ front_matter = file_data.front_matter
+ schema_variables: TemplateContext = {}
+ template_context: TemplateContext = {}
+ non_schema_variable_prefix = SETTINGS["FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX"]
+ for key, value in front_matter.items():
+ if key.startswith(non_schema_variable_prefix):
+ template_context[key.replace(non_schema_variable_prefix, "", 1)] = value
+ continue
+ schema_variables[key] = value
+
+ schema_data = json.dumps(
+ format_schema_data(schema_variables),
+ default=json_converter_with_dates,
+ )
+
+ validate_front_matter_as_schema(filepath, front_matter, warning_console)
+
+ schema_type_tag = f''
+
+ template_context = {
+ "body": file_data.body,
+ "filepath": filepath,
+ "schema_data": schema_data,
+ "schema_type_tag": schema_type_tag,
+ "open_graph_tags": open_graph_meta_tags(file_data.front_matter),
+ "build_path": folder_in_build,
+ "file_data_by_directory": {
+ str(path): data for path, data in deepcopy(file_data_by_directory).items()
+ },
+ "settings": deepcopy(SETTINGS),
+ **schema_variables,
+ **deepcopy(extra_context),
+ **template_context,
+ }
+
+ html = template.render(dataclasses=dataclasses, **template_context)
+ for html_plugin in discovered_html_plugins:
+ try:
+ html = html_plugin.load()(html, template_context, release)
+ except Exception as err:
+ print(f"Error initializing plugin {html_plugin}: {err}")
+
+ # Write file
+ write_index_file_creating_path(folder_in_build, html)
diff --git a/blurry/gather_file_data_by_directory.py b/blurry/gather_file_data_by_directory.py
new file mode 100644
index 0000000..845e983
--- /dev/null
+++ b/blurry/gather_file_data_by_directory.py
@@ -0,0 +1,60 @@
+import concurrent.futures
+from pathlib import Path
+
+from blurry.markdown import convert_markdown_file_to_html
+from blurry.settings import get_content_directory
+from blurry.types import DirectoryFileData
+from blurry.types import MarkdownFileData
+
+
+def sort_directory_file_data_by_date(
+ directory_file_data: DirectoryFileData,
+) -> DirectoryFileData:
+ for path, file_data in directory_file_data.items():
+ file_data.sort(
+ key=lambda page: str(page.front_matter.get("datePublished", ""))
+ or str(page.front_matter.get("dateCreated", ""))
+ or "0000-00-00",
+ reverse=True,
+ )
+ directory_file_data[path] = file_data
+
+ return directory_file_data
+
+
+def gather_file_data_by_directory() -> DirectoryFileData:
+ # Sort file data by publishedDate/createdDate, descending, if present
+ file_data_by_directory: DirectoryFileData = {}
+ content_directory = get_content_directory()
+
+ markdown_future_to_path: dict[concurrent.futures.Future, Path] = {}
+ with concurrent.futures.ProcessPoolExecutor() as executor:
+ for filepath in content_directory.rglob("*.md"):
+ # Extract filepath for storing context data and writing out
+ relative_filepath = filepath.relative_to(content_directory)
+
+ # Convert Markdown file to HTML
+ future = executor.submit(convert_markdown_file_to_html, filepath)
+ markdown_future_to_path[future] = relative_filepath
+
+ for future in concurrent.futures.as_completed(markdown_future_to_path):
+ body, front_matter = future.result()
+ relative_filepath = markdown_future_to_path[future]
+ if exception := future.exception():
+ print(
+ f"{relative_filepath}: Could not convert file to HTML - {exception}"
+ )
+ file_data = MarkdownFileData(
+ body=body,
+ front_matter=front_matter,
+ path=relative_filepath,
+ )
+ parent_directory = relative_filepath.parent
+ try:
+ file_data_by_directory[parent_directory].append(file_data)
+ except KeyError:
+ file_data_by_directory[parent_directory] = [file_data]
+
+ concurrent.futures.wait(markdown_future_to_path)
+
+ return sort_directory_file_data_by_date(file_data_by_directory)
diff --git a/blurry/runserver_handlers.py b/blurry/runserver_handlers.py
new file mode 100644
index 0000000..d7df130
--- /dev/null
+++ b/blurry/runserver_handlers.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+
+from jinja2 import Environment
+
+from blurry.file_processors import process_jinja_file
+from blurry.file_processors import write_html_file
+from blurry.gather_file_data_by_directory import gather_file_data_by_directory
+from blurry.settings import get_content_directory
+
+
+def handle_changed_jinja_files(filepaths: list[str], jinja_env: Environment):
+ file_data_by_directory = gather_file_data_by_directory()
+ for filepath in filepaths:
+ process_jinja_file(
+ Path.cwd() / filepath,
+ jinja_env,
+ file_data_by_directory,
+ )
+
+
+def handle_changed_markdown_files(filepaths: list[str], jinja_env: Environment):
+ file_data_by_directory = gather_file_data_by_directory()
+ content_directory = get_content_directory()
+ for filepath in filepaths:
+ write_html_file(
+ filepath=(Path.cwd() / filepath).relative_to(content_directory),
+ file_data_by_directory=file_data_by_directory,
+ release=False,
+ jinja_env=jinja_env,
+ )
+
+
+def rebuild_markdown_files(jinja_env: Environment):
+ content_directory = get_content_directory()
+ markdown_paths = content_directory.rglob("*.md")
+ markdown_files = [str(p) for p in markdown_paths]
+ handle_changed_markdown_files(markdown_files, jinja_env)
diff --git a/blurry/utils.py b/blurry/utils.py
index 5d4bebf..736211b 100644
--- a/blurry/utils.py
+++ b/blurry/utils.py
@@ -3,7 +3,6 @@
from blurry.settings import get_build_directory
from blurry.settings import get_content_directory
from blurry.settings import SETTINGS
-from blurry.types import DirectoryFileData
def get_domain_with_scheme():
@@ -95,21 +94,6 @@ def build_path_to_url(path: Path) -> str:
return f"{get_domain_with_scheme()}{pathname}"
-def sort_directory_file_data_by_date(
- directory_file_data: DirectoryFileData,
-) -> DirectoryFileData:
- for path, file_data in directory_file_data.items():
- file_data.sort(
- key=lambda page: str(page.front_matter.get("datePublished", ""))
- or str(page.front_matter.get("dateCreated", ""))
- or "0000-00-00",
- reverse=True,
- )
- directory_file_data[path] = file_data
-
- return directory_file_data
-
-
def format_schema_data(schema_data: dict) -> dict:
formatted_schema_data = {"@context": "https://schema.org"}
formatted_schema_data.update(schema_data)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c475ad6..2cbbc07 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,6 +4,7 @@
import pytest
+from blurry.gather_file_data_by_directory import sort_directory_file_data_by_date
from blurry.settings import get_build_directory
from blurry.settings import get_content_directory
from blurry.settings import SETTINGS
@@ -14,7 +15,6 @@
from blurry.utils import format_schema_data
from blurry.utils import get_domain_with_scheme
from blurry.utils import path_to_url_pathname
-from blurry.utils import sort_directory_file_data_by_date
BUILD_DIR = get_build_directory()