From 0f1f16377732e598bb9edf2ab4a945396367c84f Mon Sep 17 00:00:00 2001 From: John Franey <1728528+johnfraney@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:10:16 -0400 Subject: [PATCH] refactor: move functions out of top init file --- blurry/__init__.py | 228 ++---------------------- blurry/file_processors/__init__.py | 166 +++++++++++++++++ blurry/gather_file_data_by_directory.py | 60 +++++++ blurry/runserver_handlers.py | 37 ++++ blurry/utils.py | 16 -- tests/test_utils.py | 2 +- 6 files changed, 274 insertions(+), 235 deletions(-) create mode 100644 blurry/file_processors/__init__.py create mode 100644 blurry/gather_file_data_by_directory.py create mode 100644 blurry/runserver_handlers.py diff --git a/blurry/__init__.py b/blurry/__init__.py index cccf180..23eed9d 100644 --- a/blurry/__init__.py +++ b/blurry/__init__.py @@ -1,19 +1,12 @@ import asyncio import concurrent.futures -import dataclasses import importlib -import json -import mimetypes import os import pkgutil -import shutil import sys -from copy import deepcopy from datetime import datetime from pathlib import Path -from typing import Any -from jinja2 import Environment from livereload import Server from rich import print from rich.console import Console @@ -28,30 +21,18 @@ from blurry.commands.clean import clean_build_directory from blurry.commands.init import initialize_new_project from blurry.constants import ENV_VAR_PREFIX -from blurry.images import generate_images_for_srcset -from blurry.markdown import convert_markdown_file_to_html -from blurry.open_graph import open_graph_meta_tags -from blurry.plugins import discovered_html_plugins -from blurry.schema_validation import validate_front_matter_as_schema +from blurry.file_processors import process_non_markdown_file +from blurry.file_processors import write_html_file +from blurry.gather_file_data_by_directory import gather_file_data_by_directory +from blurry.runserver_handlers import handle_changed_jinja_files +from blurry.runserver_handlers import handle_changed_markdown_files +from blurry.runserver_handlers import rebuild_markdown_files from blurry.settings import get_build_directory from blurry.settings import get_content_directory from blurry.settings import SETTINGS from blurry.settings import update_settings from blurry.sitemap import write_sitemap_file from blurry.templates import get_jinja_env -from blurry.types import DirectoryFileData -from blurry.types import MarkdownFileData -from blurry.types import TemplateContext -from blurry.utils import content_path_to_url -from blurry.utils import convert_content_path_to_directory_in_build -from blurry.utils import format_schema_data -from blurry.utils import sort_directory_file_data_by_date -from blurry.utils import write_index_file_creating_path - - -def json_converter_with_dates(item: Any) -> None | str: - if isinstance(item, datetime): - return item.strftime("%Y-%M-%D") warning_console = Console(stderr=True, style="bold yellow") @@ -60,176 +41,6 @@ def json_converter_with_dates(item: Any) -> None | str: app = AsyncTyper() -def process_non_markdown_file( - filepath: Path, file_data_by_directory, jinja_env: Environment -): - # Process Jinja files - if ".jinja" in filepath.suffixes: - process_jinja_file(filepath, jinja_env, file_data_by_directory) - return - - CONTENT_DIR = get_content_directory() - mimetype, _ = mimetypes.guess_type(filepath, strict=False) - relative_filepath = filepath.relative_to(CONTENT_DIR) - output_file = get_build_directory() / relative_filepath - - # Copy file to build directory if it is not already there - if not output_file.exists(): - output_file.parent.mkdir(exist_ok=True, parents=True) - shutil.copyfile(filepath, output_file) - - # Create srcset images - if mimetype in [ - mimetypes.types_map[".jpg"], - mimetypes.types_map[".png"], - ]: - asyncio.run(generate_images_for_srcset(filepath)) - - -def process_jinja_file(filepath: Path, jinja_env: Environment, file_data_by_directory): - build_directory = get_build_directory() - content_directory = get_content_directory() - template = jinja_env.get_template(str(filepath.relative_to(content_directory))) - context = { - "file_data_by_directory": { - str(path): data for path, data in deepcopy(file_data_by_directory).items() - }, - "settings": deepcopy(SETTINGS), - "datetime": datetime, - } - filepath_with_new_extension = filepath.with_suffix( - filepath.suffix.replace(".jinja", "") - ) - filepath_in_build = build_directory / filepath_with_new_extension.relative_to( - content_directory - ) - html = template.render(dataclasses=dataclasses, **context) - filepath_in_build.write_text(html) - - -def write_html_file( - filepath: Path, - file_data_by_directory: dict[Path, list[MarkdownFileData]], - release: bool, - jinja_env: Environment, -): - extra_context: TemplateContext = {} - # Gather data from other files in this directory if this is an index file - file_data_list = file_data_by_directory[filepath.parent] - if filepath.name == "index.md": - sibling_pages = [ - { - "url": content_path_to_url(f.path), - **f.front_matter, - } - for f in file_data_list - if f.path != filepath - ] - extra_context["sibling_pages"] = sibling_pages - folder_in_build = convert_content_path_to_directory_in_build(filepath) - - file_data = [ - f for f in file_data_by_directory[filepath.parent] if f.path == filepath - ][0] - schema_type = file_data.front_matter.get("@type") - if not schema_type: - raise ValueError( - f"Required @type value missing in file or TOML front matter invalid: " - f"{filepath}" - ) - template_extension = SETTINGS["MARKDOWN_FILE_JINJA_TEMPLATE_EXTENSION"] - template = jinja_env.get_template(f"{schema_type}{template_extension}") - - # Map custom template name to Schema.org type - if mapped_schema_type := SETTINGS["TEMPLATE_SCHEMA_TYPES"].get(schema_type): - file_data.front_matter["@type"] = mapped_schema_type - - # Include non-schema variables as top-level context values, removing them from - # front_matter - front_matter = file_data.front_matter - schema_variables: TemplateContext = {} - template_context: TemplateContext = {} - non_schema_variable_prefix = SETTINGS["FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX"] - for key, value in front_matter.items(): - if key.startswith(non_schema_variable_prefix): - template_context[key.replace(non_schema_variable_prefix, "", 1)] = value - continue - schema_variables[key] = value - - schema_data = json.dumps( - format_schema_data(schema_variables), - default=json_converter_with_dates, - ) - - validate_front_matter_as_schema(filepath, front_matter, warning_console) - - schema_type_tag = f'' - - template_context = { - "body": file_data.body, - "filepath": filepath, - "schema_data": schema_data, - "schema_type_tag": schema_type_tag, - "open_graph_tags": open_graph_meta_tags(file_data.front_matter), - "build_path": folder_in_build, - "file_data_by_directory": { - str(path): data for path, data in deepcopy(file_data_by_directory).items() - }, - "settings": deepcopy(SETTINGS), - **schema_variables, - **deepcopy(extra_context), - **template_context, - } - - html = template.render(dataclasses=dataclasses, **template_context) - for html_plugin in discovered_html_plugins: - try: - html = html_plugin.load()(html, template_context, release) - except Exception as err: - print(f"Error initializing plugin {html_plugin}: {err}") - - # Write file - write_index_file_creating_path(folder_in_build, html) - - -def gather_file_data_by_directory() -> DirectoryFileData: - # Sort file data by publishedDate/createdDate, descending, if present - file_data_by_directory: DirectoryFileData = {} - content_directory = get_content_directory() - - markdown_future_to_path: dict[concurrent.futures.Future, Path] = {} - with concurrent.futures.ProcessPoolExecutor() as executor: - for filepath in content_directory.rglob("*.md"): - # Extract filepath for storing context data and writing out - relative_filepath = filepath.relative_to(content_directory) - - # Convert Markdown file to HTML - future = executor.submit(convert_markdown_file_to_html, filepath) - markdown_future_to_path[future] = relative_filepath - - for future in concurrent.futures.as_completed(markdown_future_to_path): - body, front_matter = future.result() - relative_filepath = markdown_future_to_path[future] - if exception := future.exception(): - print( - f"{relative_filepath}: Could not convert file to HTML - {exception}" - ) - file_data = MarkdownFileData( - body=body, - front_matter=front_matter, - path=relative_filepath, - ) - parent_directory = relative_filepath.parent - try: - file_data_by_directory[parent_directory].append(file_data) - except KeyError: - file_data_by_directory[parent_directory] = [file_data] - - concurrent.futures.wait(markdown_future_to_path) - - return sort_directory_file_data_by_date(file_data_by_directory) - - @app.command(name="clean") def clean_command(): clean_build_directory() @@ -358,33 +169,14 @@ def runserver(): jinja_env = get_jinja_env() - def handle_changed_jinja_files(filepaths: list[str]): - file_data_by_directory = gather_file_data_by_directory() - for filepath in filepaths: - process_jinja_file( - Path.cwd() / filepath, - jinja_env, - file_data_by_directory, - ) - - def handle_changed_markdown_files(filepaths: list[str]): - file_data_by_directory = gather_file_data_by_directory() - content_directory = get_content_directory() - for filepath in filepaths: - write_html_file( - filepath=(Path.cwd() / filepath).relative_to(content_directory), - file_data_by_directory=file_data_by_directory, - release=False, - jinja_env=jinja_env, - ) - livereload_server = Server() livereload_server.watch( - f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.jinja", handle_changed_jinja_files + f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.jinja", + lambda filepaths: handle_changed_jinja_files(filepaths, jinja_env), ) livereload_server.watch( f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.md", - handle_changed_markdown_files, + lambda filepaths: handle_changed_markdown_files(filepaths, jinja_env), ) livereload_server.watch( f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*", @@ -399,7 +191,7 @@ def handle_changed_markdown_files(filepaths: list[str]): ) livereload_server.watch( f"{SETTINGS['TEMPLATES_DIRECTORY_NAME']}/**/*", - lambda: event_loop.create_task(build(release=False)), + lambda: rebuild_markdown_files(jinja_env), ignore=lambda filepath: Path(filepath).is_dir(), ) livereload_server.watch( diff --git a/blurry/file_processors/__init__.py b/blurry/file_processors/__init__.py new file mode 100644 index 0000000..e43cc75 --- /dev/null +++ b/blurry/file_processors/__init__.py @@ -0,0 +1,166 @@ +import asyncio +import dataclasses +import json +import mimetypes +import shutil +from copy import deepcopy +from datetime import datetime +from pathlib import Path +from typing import Any + +from jinja2 import Environment +from rich.console import Console + +from blurry.images import generate_images_for_srcset +from blurry.open_graph import open_graph_meta_tags +from blurry.plugins import discovered_html_plugins +from blurry.schema_validation import validate_front_matter_as_schema +from blurry.settings import get_build_directory +from blurry.settings import get_content_directory +from blurry.settings import SETTINGS +from blurry.types import MarkdownFileData +from blurry.types import TemplateContext +from blurry.utils import content_path_to_url +from blurry.utils import convert_content_path_to_directory_in_build +from blurry.utils import format_schema_data +from blurry.utils import write_index_file_creating_path + + +warning_console = Console(stderr=True, style="bold yellow") + + +def process_non_markdown_file( + filepath: Path, file_data_by_directory, jinja_env: Environment +): + # Process Jinja files + if ".jinja" in filepath.suffixes: + process_jinja_file(filepath, jinja_env, file_data_by_directory) + return + + CONTENT_DIR = get_content_directory() + mimetype, _ = mimetypes.guess_type(filepath, strict=False) + relative_filepath = filepath.relative_to(CONTENT_DIR) + output_file = get_build_directory() / relative_filepath + + # Copy file to build directory if it is not already there + if not output_file.exists(): + output_file.parent.mkdir(exist_ok=True, parents=True) + shutil.copyfile(filepath, output_file) + + # Create srcset images + if mimetype in [ + mimetypes.types_map[".jpg"], + mimetypes.types_map[".png"], + ]: + asyncio.run(generate_images_for_srcset(filepath)) + + +def process_jinja_file(filepath: Path, jinja_env: Environment, file_data_by_directory): + build_directory = get_build_directory() + content_directory = get_content_directory() + template = jinja_env.get_template(str(filepath.relative_to(content_directory))) + context = { + "file_data_by_directory": { + str(path): data for path, data in deepcopy(file_data_by_directory).items() + }, + "settings": deepcopy(SETTINGS), + "datetime": datetime, + } + filepath_with_new_extension = filepath.with_suffix( + filepath.suffix.replace(".jinja", "") + ) + filepath_in_build = build_directory / filepath_with_new_extension.relative_to( + content_directory + ) + html = template.render(dataclasses=dataclasses, **context) + filepath_in_build.write_text(html) + + +def json_converter_with_dates(item: Any) -> None | str: + if isinstance(item, datetime): + return item.strftime("%Y-%M-%D") + + +def write_html_file( + filepath: Path, + file_data_by_directory: dict[Path, list[MarkdownFileData]], + release: bool, + jinja_env: Environment, +): + extra_context: TemplateContext = {} + # Gather data from other files in this directory if this is an index file + file_data_list = file_data_by_directory[filepath.parent] + if filepath.name == "index.md": + sibling_pages = [ + { + "url": content_path_to_url(f.path), + **f.front_matter, + } + for f in file_data_list + if f.path != filepath + ] + extra_context["sibling_pages"] = sibling_pages + folder_in_build = convert_content_path_to_directory_in_build(filepath) + + file_data = [ + f for f in file_data_by_directory[filepath.parent] if f.path == filepath + ][0] + schema_type = file_data.front_matter.get("@type") + if not schema_type: + raise ValueError( + f"Required @type value missing in file or TOML front matter invalid: " + f"{filepath}" + ) + template_extension = SETTINGS["MARKDOWN_FILE_JINJA_TEMPLATE_EXTENSION"] + template = jinja_env.get_template(f"{schema_type}{template_extension}") + + # Map custom template name to Schema.org type + if mapped_schema_type := SETTINGS["TEMPLATE_SCHEMA_TYPES"].get(schema_type): + file_data.front_matter["@type"] = mapped_schema_type + + # Include non-schema variables as top-level context values, removing them from + # front_matter + front_matter = file_data.front_matter + schema_variables: TemplateContext = {} + template_context: TemplateContext = {} + non_schema_variable_prefix = SETTINGS["FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX"] + for key, value in front_matter.items(): + if key.startswith(non_schema_variable_prefix): + template_context[key.replace(non_schema_variable_prefix, "", 1)] = value + continue + schema_variables[key] = value + + schema_data = json.dumps( + format_schema_data(schema_variables), + default=json_converter_with_dates, + ) + + validate_front_matter_as_schema(filepath, front_matter, warning_console) + + schema_type_tag = f'' + + template_context = { + "body": file_data.body, + "filepath": filepath, + "schema_data": schema_data, + "schema_type_tag": schema_type_tag, + "open_graph_tags": open_graph_meta_tags(file_data.front_matter), + "build_path": folder_in_build, + "file_data_by_directory": { + str(path): data for path, data in deepcopy(file_data_by_directory).items() + }, + "settings": deepcopy(SETTINGS), + **schema_variables, + **deepcopy(extra_context), + **template_context, + } + + html = template.render(dataclasses=dataclasses, **template_context) + for html_plugin in discovered_html_plugins: + try: + html = html_plugin.load()(html, template_context, release) + except Exception as err: + print(f"Error initializing plugin {html_plugin}: {err}") + + # Write file + write_index_file_creating_path(folder_in_build, html) diff --git a/blurry/gather_file_data_by_directory.py b/blurry/gather_file_data_by_directory.py new file mode 100644 index 0000000..845e983 --- /dev/null +++ b/blurry/gather_file_data_by_directory.py @@ -0,0 +1,60 @@ +import concurrent.futures +from pathlib import Path + +from blurry.markdown import convert_markdown_file_to_html +from blurry.settings import get_content_directory +from blurry.types import DirectoryFileData +from blurry.types import MarkdownFileData + + +def sort_directory_file_data_by_date( + directory_file_data: DirectoryFileData, +) -> DirectoryFileData: + for path, file_data in directory_file_data.items(): + file_data.sort( + key=lambda page: str(page.front_matter.get("datePublished", "")) + or str(page.front_matter.get("dateCreated", "")) + or "0000-00-00", + reverse=True, + ) + directory_file_data[path] = file_data + + return directory_file_data + + +def gather_file_data_by_directory() -> DirectoryFileData: + # Sort file data by publishedDate/createdDate, descending, if present + file_data_by_directory: DirectoryFileData = {} + content_directory = get_content_directory() + + markdown_future_to_path: dict[concurrent.futures.Future, Path] = {} + with concurrent.futures.ProcessPoolExecutor() as executor: + for filepath in content_directory.rglob("*.md"): + # Extract filepath for storing context data and writing out + relative_filepath = filepath.relative_to(content_directory) + + # Convert Markdown file to HTML + future = executor.submit(convert_markdown_file_to_html, filepath) + markdown_future_to_path[future] = relative_filepath + + for future in concurrent.futures.as_completed(markdown_future_to_path): + body, front_matter = future.result() + relative_filepath = markdown_future_to_path[future] + if exception := future.exception(): + print( + f"{relative_filepath}: Could not convert file to HTML - {exception}" + ) + file_data = MarkdownFileData( + body=body, + front_matter=front_matter, + path=relative_filepath, + ) + parent_directory = relative_filepath.parent + try: + file_data_by_directory[parent_directory].append(file_data) + except KeyError: + file_data_by_directory[parent_directory] = [file_data] + + concurrent.futures.wait(markdown_future_to_path) + + return sort_directory_file_data_by_date(file_data_by_directory) diff --git a/blurry/runserver_handlers.py b/blurry/runserver_handlers.py new file mode 100644 index 0000000..d7df130 --- /dev/null +++ b/blurry/runserver_handlers.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from jinja2 import Environment + +from blurry.file_processors import process_jinja_file +from blurry.file_processors import write_html_file +from blurry.gather_file_data_by_directory import gather_file_data_by_directory +from blurry.settings import get_content_directory + + +def handle_changed_jinja_files(filepaths: list[str], jinja_env: Environment): + file_data_by_directory = gather_file_data_by_directory() + for filepath in filepaths: + process_jinja_file( + Path.cwd() / filepath, + jinja_env, + file_data_by_directory, + ) + + +def handle_changed_markdown_files(filepaths: list[str], jinja_env: Environment): + file_data_by_directory = gather_file_data_by_directory() + content_directory = get_content_directory() + for filepath in filepaths: + write_html_file( + filepath=(Path.cwd() / filepath).relative_to(content_directory), + file_data_by_directory=file_data_by_directory, + release=False, + jinja_env=jinja_env, + ) + + +def rebuild_markdown_files(jinja_env: Environment): + content_directory = get_content_directory() + markdown_paths = content_directory.rglob("*.md") + markdown_files = [str(p) for p in markdown_paths] + handle_changed_markdown_files(markdown_files, jinja_env) diff --git a/blurry/utils.py b/blurry/utils.py index 5d4bebf..736211b 100644 --- a/blurry/utils.py +++ b/blurry/utils.py @@ -3,7 +3,6 @@ from blurry.settings import get_build_directory from blurry.settings import get_content_directory from blurry.settings import SETTINGS -from blurry.types import DirectoryFileData def get_domain_with_scheme(): @@ -95,21 +94,6 @@ def build_path_to_url(path: Path) -> str: return f"{get_domain_with_scheme()}{pathname}" -def sort_directory_file_data_by_date( - directory_file_data: DirectoryFileData, -) -> DirectoryFileData: - for path, file_data in directory_file_data.items(): - file_data.sort( - key=lambda page: str(page.front_matter.get("datePublished", "")) - or str(page.front_matter.get("dateCreated", "")) - or "0000-00-00", - reverse=True, - ) - directory_file_data[path] = file_data - - return directory_file_data - - def format_schema_data(schema_data: dict) -> dict: formatted_schema_data = {"@context": "https://schema.org"} formatted_schema_data.update(schema_data) diff --git a/tests/test_utils.py b/tests/test_utils.py index c475ad6..2cbbc07 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,6 +4,7 @@ import pytest +from blurry.gather_file_data_by_directory import sort_directory_file_data_by_date from blurry.settings import get_build_directory from blurry.settings import get_content_directory from blurry.settings import SETTINGS @@ -14,7 +15,6 @@ from blurry.utils import format_schema_data from blurry.utils import get_domain_with_scheme from blurry.utils import path_to_url_pathname -from blurry.utils import sort_directory_file_data_by_date BUILD_DIR = get_build_directory()