From 0f1f16377732e598bb9edf2ab4a945396367c84f Mon Sep 17 00:00:00 2001
From: John Franey <1728528+johnfraney@users.noreply.github.com>
Date: Tue, 7 Jan 2025 15:10:16 -0400
Subject: [PATCH] refactor: move functions out of top init file

---
 blurry/__init__.py                      | 228 ++----------------------
 blurry/file_processors/__init__.py      | 166 +++++++++++++++++
 blurry/gather_file_data_by_directory.py |  60 +++++++
 blurry/runserver_handlers.py            |  37 ++++
 blurry/utils.py                         |  16 --
 tests/test_utils.py                     |   2 +-
 6 files changed, 274 insertions(+), 235 deletions(-)
 create mode 100644 blurry/file_processors/__init__.py
 create mode 100644 blurry/gather_file_data_by_directory.py
 create mode 100644 blurry/runserver_handlers.py

diff --git a/blurry/__init__.py b/blurry/__init__.py
index cccf180..23eed9d 100644
--- a/blurry/__init__.py
+++ b/blurry/__init__.py
@@ -1,19 +1,12 @@
 import asyncio
 import concurrent.futures
-import dataclasses
 import importlib
-import json
-import mimetypes
 import os
 import pkgutil
-import shutil
 import sys
-from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import Any
 
-from jinja2 import Environment
 from livereload import Server
 from rich import print
 from rich.console import Console
@@ -28,30 +21,18 @@
 from blurry.commands.clean import clean_build_directory
 from blurry.commands.init import initialize_new_project
 from blurry.constants import ENV_VAR_PREFIX
-from blurry.images import generate_images_for_srcset
-from blurry.markdown import convert_markdown_file_to_html
-from blurry.open_graph import open_graph_meta_tags
-from blurry.plugins import discovered_html_plugins
-from blurry.schema_validation import validate_front_matter_as_schema
+from blurry.file_processors import process_non_markdown_file
+from blurry.file_processors import write_html_file
+from blurry.gather_file_data_by_directory import gather_file_data_by_directory
+from blurry.runserver_handlers import handle_changed_jinja_files
+from blurry.runserver_handlers import handle_changed_markdown_files
+from blurry.runserver_handlers import rebuild_markdown_files
 from blurry.settings import get_build_directory
 from blurry.settings import get_content_directory
 from blurry.settings import SETTINGS
 from blurry.settings import update_settings
 from blurry.sitemap import write_sitemap_file
 from blurry.templates import get_jinja_env
-from blurry.types import DirectoryFileData
-from blurry.types import MarkdownFileData
-from blurry.types import TemplateContext
-from blurry.utils import content_path_to_url
-from blurry.utils import convert_content_path_to_directory_in_build
-from blurry.utils import format_schema_data
-from blurry.utils import sort_directory_file_data_by_date
-from blurry.utils import write_index_file_creating_path
-
-
-def json_converter_with_dates(item: Any) -> None | str:
-    if isinstance(item, datetime):
-        return item.strftime("%Y-%M-%D")
 
 
 warning_console = Console(stderr=True, style="bold yellow")
@@ -60,176 +41,6 @@ def json_converter_with_dates(item: Any) -> None | str:
 app = AsyncTyper()
 
 
-def process_non_markdown_file(
-    filepath: Path, file_data_by_directory, jinja_env: Environment
-):
-    # Process Jinja files
-    if ".jinja" in filepath.suffixes:
-        process_jinja_file(filepath, jinja_env, file_data_by_directory)
-        return
-
-    CONTENT_DIR = get_content_directory()
-    mimetype, _ = mimetypes.guess_type(filepath, strict=False)
-    relative_filepath = filepath.relative_to(CONTENT_DIR)
-    output_file = get_build_directory() / relative_filepath
-
-    # Copy file to build directory if it is not already there
-    if not output_file.exists():
-        output_file.parent.mkdir(exist_ok=True, parents=True)
-        shutil.copyfile(filepath, output_file)
-
-    # Create srcset images
-    if mimetype in [
-        mimetypes.types_map[".jpg"],
-        mimetypes.types_map[".png"],
-    ]:
-        asyncio.run(generate_images_for_srcset(filepath))
-
-
-def process_jinja_file(filepath: Path, jinja_env: Environment, file_data_by_directory):
-    build_directory = get_build_directory()
-    content_directory = get_content_directory()
-    template = jinja_env.get_template(str(filepath.relative_to(content_directory)))
-    context = {
-        "file_data_by_directory": {
-            str(path): data for path, data in deepcopy(file_data_by_directory).items()
-        },
-        "settings": deepcopy(SETTINGS),
-        "datetime": datetime,
-    }
-    filepath_with_new_extension = filepath.with_suffix(
-        filepath.suffix.replace(".jinja", "")
-    )
-    filepath_in_build = build_directory / filepath_with_new_extension.relative_to(
-        content_directory
-    )
-    html = template.render(dataclasses=dataclasses, **context)
-    filepath_in_build.write_text(html)
-
-
-def write_html_file(
-    filepath: Path,
-    file_data_by_directory: dict[Path, list[MarkdownFileData]],
-    release: bool,
-    jinja_env: Environment,
-):
-    extra_context: TemplateContext = {}
-    # Gather data from other files in this directory if this is an index file
-    file_data_list = file_data_by_directory[filepath.parent]
-    if filepath.name == "index.md":
-        sibling_pages = [
-            {
-                "url": content_path_to_url(f.path),
-                **f.front_matter,
-            }
-            for f in file_data_list
-            if f.path != filepath
-        ]
-        extra_context["sibling_pages"] = sibling_pages
-    folder_in_build = convert_content_path_to_directory_in_build(filepath)
-
-    file_data = [
-        f for f in file_data_by_directory[filepath.parent] if f.path == filepath
-    ][0]
-    schema_type = file_data.front_matter.get("@type")
-    if not schema_type:
-        raise ValueError(
-            f"Required @type value missing in file or TOML front matter invalid: "
-            f"{filepath}"
-        )
-    template_extension = SETTINGS["MARKDOWN_FILE_JINJA_TEMPLATE_EXTENSION"]
-    template = jinja_env.get_template(f"{schema_type}{template_extension}")
-
-    # Map custom template name to Schema.org type
-    if mapped_schema_type := SETTINGS["TEMPLATE_SCHEMA_TYPES"].get(schema_type):
-        file_data.front_matter["@type"] = mapped_schema_type
-
-    # Include non-schema variables as top-level context values, removing them from
-    # front_matter
-    front_matter = file_data.front_matter
-    schema_variables: TemplateContext = {}
-    template_context: TemplateContext = {}
-    non_schema_variable_prefix = SETTINGS["FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX"]
-    for key, value in front_matter.items():
-        if key.startswith(non_schema_variable_prefix):
-            template_context[key.replace(non_schema_variable_prefix, "", 1)] = value
-            continue
-        schema_variables[key] = value
-
-    schema_data = json.dumps(
-        format_schema_data(schema_variables),
-        default=json_converter_with_dates,
-    )
-
-    validate_front_matter_as_schema(filepath, front_matter, warning_console)
-
-    schema_type_tag = f'<script type="application/ld+json">{schema_data}</script>'
-
-    template_context = {
-        "body": file_data.body,
-        "filepath": filepath,
-        "schema_data": schema_data,
-        "schema_type_tag": schema_type_tag,
-        "open_graph_tags": open_graph_meta_tags(file_data.front_matter),
-        "build_path": folder_in_build,
-        "file_data_by_directory": {
-            str(path): data for path, data in deepcopy(file_data_by_directory).items()
-        },
-        "settings": deepcopy(SETTINGS),
-        **schema_variables,
-        **deepcopy(extra_context),
-        **template_context,
-    }
-
-    html = template.render(dataclasses=dataclasses, **template_context)
-    for html_plugin in discovered_html_plugins:
-        try:
-            html = html_plugin.load()(html, template_context, release)
-        except Exception as err:
-            print(f"Error initializing plugin {html_plugin}: {err}")
-
-    # Write file
-    write_index_file_creating_path(folder_in_build, html)
-
-
-def gather_file_data_by_directory() -> DirectoryFileData:
-    # Sort file data by publishedDate/createdDate, descending, if present
-    file_data_by_directory: DirectoryFileData = {}
-    content_directory = get_content_directory()
-
-    markdown_future_to_path: dict[concurrent.futures.Future, Path] = {}
-    with concurrent.futures.ProcessPoolExecutor() as executor:
-        for filepath in content_directory.rglob("*.md"):
-            # Extract filepath for storing context data and writing out
-            relative_filepath = filepath.relative_to(content_directory)
-
-            # Convert Markdown file to HTML
-            future = executor.submit(convert_markdown_file_to_html, filepath)
-            markdown_future_to_path[future] = relative_filepath
-
-        for future in concurrent.futures.as_completed(markdown_future_to_path):
-            body, front_matter = future.result()
-            relative_filepath = markdown_future_to_path[future]
-            if exception := future.exception():
-                print(
-                    f"{relative_filepath}: Could not convert file to HTML - {exception}"
-                )
-            file_data = MarkdownFileData(
-                body=body,
-                front_matter=front_matter,
-                path=relative_filepath,
-            )
-            parent_directory = relative_filepath.parent
-            try:
-                file_data_by_directory[parent_directory].append(file_data)
-            except KeyError:
-                file_data_by_directory[parent_directory] = [file_data]
-
-    concurrent.futures.wait(markdown_future_to_path)
-
-    return sort_directory_file_data_by_date(file_data_by_directory)
-
-
 @app.command(name="clean")
 def clean_command():
     clean_build_directory()
@@ -358,33 +169,14 @@ def runserver():
 
     jinja_env = get_jinja_env()
 
-    def handle_changed_jinja_files(filepaths: list[str]):
-        file_data_by_directory = gather_file_data_by_directory()
-        for filepath in filepaths:
-            process_jinja_file(
-                Path.cwd() / filepath,
-                jinja_env,
-                file_data_by_directory,
-            )
-
-    def handle_changed_markdown_files(filepaths: list[str]):
-        file_data_by_directory = gather_file_data_by_directory()
-        content_directory = get_content_directory()
-        for filepath in filepaths:
-            write_html_file(
-                filepath=(Path.cwd() / filepath).relative_to(content_directory),
-                file_data_by_directory=file_data_by_directory,
-                release=False,
-                jinja_env=jinja_env,
-            )
-
     livereload_server = Server()
     livereload_server.watch(
-        f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.jinja", handle_changed_jinja_files
+        f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.jinja",
+        lambda filepaths: handle_changed_jinja_files(filepaths, jinja_env),
     )
     livereload_server.watch(
         f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*.md",
-        handle_changed_markdown_files,
+        lambda filepaths: handle_changed_markdown_files(filepaths, jinja_env),
     )
     livereload_server.watch(
         f"{SETTINGS['CONTENT_DIRECTORY_NAME']}/**/*",
@@ -399,7 +191,7 @@ def handle_changed_markdown_files(filepaths: list[str]):
     )
     livereload_server.watch(
         f"{SETTINGS['TEMPLATES_DIRECTORY_NAME']}/**/*",
-        lambda: event_loop.create_task(build(release=False)),
+        lambda: rebuild_markdown_files(jinja_env),
         ignore=lambda filepath: Path(filepath).is_dir(),
     )
     livereload_server.watch(
diff --git a/blurry/file_processors/__init__.py b/blurry/file_processors/__init__.py
new file mode 100644
index 0000000..e43cc75
--- /dev/null
+++ b/blurry/file_processors/__init__.py
@@ -0,0 +1,166 @@
+import asyncio
+import dataclasses
+import json
+import mimetypes
+import shutil
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from jinja2 import Environment
+from rich.console import Console
+
+from blurry.images import generate_images_for_srcset
+from blurry.open_graph import open_graph_meta_tags
+from blurry.plugins import discovered_html_plugins
+from blurry.schema_validation import validate_front_matter_as_schema
+from blurry.settings import get_build_directory
+from blurry.settings import get_content_directory
+from blurry.settings import SETTINGS
+from blurry.types import MarkdownFileData
+from blurry.types import TemplateContext
+from blurry.utils import content_path_to_url
+from blurry.utils import convert_content_path_to_directory_in_build
+from blurry.utils import format_schema_data
+from blurry.utils import write_index_file_creating_path
+
+
+warning_console = Console(stderr=True, style="bold yellow")
+
+
+def process_non_markdown_file(
+    filepath: Path, file_data_by_directory, jinja_env: Environment
+):
+    # Process Jinja files
+    if ".jinja" in filepath.suffixes:
+        process_jinja_file(filepath, jinja_env, file_data_by_directory)
+        return
+
+    CONTENT_DIR = get_content_directory()
+    mimetype, _ = mimetypes.guess_type(filepath, strict=False)
+    relative_filepath = filepath.relative_to(CONTENT_DIR)
+    output_file = get_build_directory() / relative_filepath
+
+    # Copy file to build directory if it is not already there
+    if not output_file.exists():
+        output_file.parent.mkdir(exist_ok=True, parents=True)
+        shutil.copyfile(filepath, output_file)
+
+    # Create srcset images
+    if mimetype in [
+        mimetypes.types_map[".jpg"],
+        mimetypes.types_map[".png"],
+    ]:
+        asyncio.run(generate_images_for_srcset(filepath))
+
+
+def process_jinja_file(filepath: Path, jinja_env: Environment, file_data_by_directory):
+    build_directory = get_build_directory()
+    content_directory = get_content_directory()
+    template = jinja_env.get_template(str(filepath.relative_to(content_directory)))
+    context = {
+        "file_data_by_directory": {
+            str(path): data for path, data in deepcopy(file_data_by_directory).items()
+        },
+        "settings": deepcopy(SETTINGS),
+        "datetime": datetime,
+    }
+    filepath_with_new_extension = filepath.with_suffix(
+        filepath.suffix.replace(".jinja", "")
+    )
+    filepath_in_build = build_directory / filepath_with_new_extension.relative_to(
+        content_directory
+    )
+    html = template.render(dataclasses=dataclasses, **context)
+    filepath_in_build.write_text(html)
+
+
+def json_converter_with_dates(item: Any) -> None | str:
+    if isinstance(item, datetime):
+        return item.strftime("%Y-%M-%D")
+
+
+def write_html_file(
+    filepath: Path,
+    file_data_by_directory: dict[Path, list[MarkdownFileData]],
+    release: bool,
+    jinja_env: Environment,
+):
+    extra_context: TemplateContext = {}
+    # Gather data from other files in this directory if this is an index file
+    file_data_list = file_data_by_directory[filepath.parent]
+    if filepath.name == "index.md":
+        sibling_pages = [
+            {
+                "url": content_path_to_url(f.path),
+                **f.front_matter,
+            }
+            for f in file_data_list
+            if f.path != filepath
+        ]
+        extra_context["sibling_pages"] = sibling_pages
+    folder_in_build = convert_content_path_to_directory_in_build(filepath)
+
+    file_data = [
+        f for f in file_data_by_directory[filepath.parent] if f.path == filepath
+    ][0]
+    schema_type = file_data.front_matter.get("@type")
+    if not schema_type:
+        raise ValueError(
+            f"Required @type value missing in file or TOML front matter invalid: "
+            f"{filepath}"
+        )
+    template_extension = SETTINGS["MARKDOWN_FILE_JINJA_TEMPLATE_EXTENSION"]
+    template = jinja_env.get_template(f"{schema_type}{template_extension}")
+
+    # Map custom template name to Schema.org type
+    if mapped_schema_type := SETTINGS["TEMPLATE_SCHEMA_TYPES"].get(schema_type):
+        file_data.front_matter["@type"] = mapped_schema_type
+
+    # Include non-schema variables as top-level context values, removing them from
+    # front_matter
+    front_matter = file_data.front_matter
+    schema_variables: TemplateContext = {}
+    template_context: TemplateContext = {}
+    non_schema_variable_prefix = SETTINGS["FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX"]
+    for key, value in front_matter.items():
+        if key.startswith(non_schema_variable_prefix):
+            template_context[key.replace(non_schema_variable_prefix, "", 1)] = value
+            continue
+        schema_variables[key] = value
+
+    schema_data = json.dumps(
+        format_schema_data(schema_variables),
+        default=json_converter_with_dates,
+    )
+
+    validate_front_matter_as_schema(filepath, front_matter, warning_console)
+
+    schema_type_tag = f'<script type="application/ld+json">{schema_data}</script>'
+
+    template_context = {
+        "body": file_data.body,
+        "filepath": filepath,
+        "schema_data": schema_data,
+        "schema_type_tag": schema_type_tag,
+        "open_graph_tags": open_graph_meta_tags(file_data.front_matter),
+        "build_path": folder_in_build,
+        "file_data_by_directory": {
+            str(path): data for path, data in deepcopy(file_data_by_directory).items()
+        },
+        "settings": deepcopy(SETTINGS),
+        **schema_variables,
+        **deepcopy(extra_context),
+        **template_context,
+    }
+
+    html = template.render(dataclasses=dataclasses, **template_context)
+    for html_plugin in discovered_html_plugins:
+        try:
+            html = html_plugin.load()(html, template_context, release)
+        except Exception as err:
+            print(f"Error initializing plugin {html_plugin}: {err}")
+
+    # Write file
+    write_index_file_creating_path(folder_in_build, html)
diff --git a/blurry/gather_file_data_by_directory.py b/blurry/gather_file_data_by_directory.py
new file mode 100644
index 0000000..845e983
--- /dev/null
+++ b/blurry/gather_file_data_by_directory.py
@@ -0,0 +1,60 @@
+import concurrent.futures
+from pathlib import Path
+
+from blurry.markdown import convert_markdown_file_to_html
+from blurry.settings import get_content_directory
+from blurry.types import DirectoryFileData
+from blurry.types import MarkdownFileData
+
+
+def sort_directory_file_data_by_date(
+    directory_file_data: DirectoryFileData,
+) -> DirectoryFileData:
+    for path, file_data in directory_file_data.items():
+        file_data.sort(
+            key=lambda page: str(page.front_matter.get("datePublished", ""))
+            or str(page.front_matter.get("dateCreated", ""))
+            or "0000-00-00",
+            reverse=True,
+        )
+        directory_file_data[path] = file_data
+
+    return directory_file_data
+
+
+def gather_file_data_by_directory() -> DirectoryFileData:
+    # Sort file data by publishedDate/createdDate, descending, if present
+    file_data_by_directory: DirectoryFileData = {}
+    content_directory = get_content_directory()
+
+    markdown_future_to_path: dict[concurrent.futures.Future, Path] = {}
+    with concurrent.futures.ProcessPoolExecutor() as executor:
+        for filepath in content_directory.rglob("*.md"):
+            # Extract filepath for storing context data and writing out
+            relative_filepath = filepath.relative_to(content_directory)
+
+            # Convert Markdown file to HTML
+            future = executor.submit(convert_markdown_file_to_html, filepath)
+            markdown_future_to_path[future] = relative_filepath
+
+        for future in concurrent.futures.as_completed(markdown_future_to_path):
+            body, front_matter = future.result()
+            relative_filepath = markdown_future_to_path[future]
+            if exception := future.exception():
+                print(
+                    f"{relative_filepath}: Could not convert file to HTML - {exception}"
+                )
+            file_data = MarkdownFileData(
+                body=body,
+                front_matter=front_matter,
+                path=relative_filepath,
+            )
+            parent_directory = relative_filepath.parent
+            try:
+                file_data_by_directory[parent_directory].append(file_data)
+            except KeyError:
+                file_data_by_directory[parent_directory] = [file_data]
+
+    concurrent.futures.wait(markdown_future_to_path)
+
+    return sort_directory_file_data_by_date(file_data_by_directory)
diff --git a/blurry/runserver_handlers.py b/blurry/runserver_handlers.py
new file mode 100644
index 0000000..d7df130
--- /dev/null
+++ b/blurry/runserver_handlers.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+
+from jinja2 import Environment
+
+from blurry.file_processors import process_jinja_file
+from blurry.file_processors import write_html_file
+from blurry.gather_file_data_by_directory import gather_file_data_by_directory
+from blurry.settings import get_content_directory
+
+
+def handle_changed_jinja_files(filepaths: list[str], jinja_env: Environment):
+    file_data_by_directory = gather_file_data_by_directory()
+    for filepath in filepaths:
+        process_jinja_file(
+            Path.cwd() / filepath,
+            jinja_env,
+            file_data_by_directory,
+        )
+
+
+def handle_changed_markdown_files(filepaths: list[str], jinja_env: Environment):
+    file_data_by_directory = gather_file_data_by_directory()
+    content_directory = get_content_directory()
+    for filepath in filepaths:
+        write_html_file(
+            filepath=(Path.cwd() / filepath).relative_to(content_directory),
+            file_data_by_directory=file_data_by_directory,
+            release=False,
+            jinja_env=jinja_env,
+        )
+
+
+def rebuild_markdown_files(jinja_env: Environment):
+    content_directory = get_content_directory()
+    markdown_paths = content_directory.rglob("*.md")
+    markdown_files = [str(p) for p in markdown_paths]
+    handle_changed_markdown_files(markdown_files, jinja_env)
diff --git a/blurry/utils.py b/blurry/utils.py
index 5d4bebf..736211b 100644
--- a/blurry/utils.py
+++ b/blurry/utils.py
@@ -3,7 +3,6 @@
 from blurry.settings import get_build_directory
 from blurry.settings import get_content_directory
 from blurry.settings import SETTINGS
-from blurry.types import DirectoryFileData
 
 
 def get_domain_with_scheme():
@@ -95,21 +94,6 @@ def build_path_to_url(path: Path) -> str:
     return f"{get_domain_with_scheme()}{pathname}"
 
 
-def sort_directory_file_data_by_date(
-    directory_file_data: DirectoryFileData,
-) -> DirectoryFileData:
-    for path, file_data in directory_file_data.items():
-        file_data.sort(
-            key=lambda page: str(page.front_matter.get("datePublished", ""))
-            or str(page.front_matter.get("dateCreated", ""))
-            or "0000-00-00",
-            reverse=True,
-        )
-        directory_file_data[path] = file_data
-
-    return directory_file_data
-
-
 def format_schema_data(schema_data: dict) -> dict:
     formatted_schema_data = {"@context": "https://schema.org"}
     formatted_schema_data.update(schema_data)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c475ad6..2cbbc07 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from blurry.gather_file_data_by_directory import sort_directory_file_data_by_date
 from blurry.settings import get_build_directory
 from blurry.settings import get_content_directory
 from blurry.settings import SETTINGS
@@ -14,7 +15,6 @@
 from blurry.utils import format_schema_data
 from blurry.utils import get_domain_with_scheme
 from blurry.utils import path_to_url_pathname
-from blurry.utils import sort_directory_file_data_by_date
 
 
 BUILD_DIR = get_build_directory()