From ccf68d0b9982a2d321a15578ad6f5893aa979ba0 Mon Sep 17 00:00:00 2001
From: cicdguy <26552821+cicdguy@users.noreply.github.com>
Date: Mon, 4 Nov 2024 09:27:40 -0600
Subject: [PATCH] fix: This is pure XML replacements
---
core.py | 466 ++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 298 insertions(+), 168 deletions(-)
diff --git a/core.py b/core.py
index 16e0744..b2b9c49 100644
--- a/core.py
+++ b/core.py
@@ -1,3 +1,7 @@
+"""
+Multi-version dropdown updater.
+"""
+
import argparse
import os
import re
@@ -7,246 +11,375 @@
from packaging.version import InvalidVersion, Version
-def generate_dropdown_list(directory, pattern, refs_order, base_url):
+def compile_pattern(pattern):
"""
- Generates version drop-down list to be inserted based
- on matching directories in the given directory and refs_order.
+ Compile the given regular expression pattern.
- :param directory: The root directory to search for matching directories.
:param pattern: A regular expression pattern to match directory names.
- :param refs_order: List determining the order of items to appear at the beginning.
- :param base_url: The base URL to be used in the hrefs.
- :return: str, Generated HTML markup.
+ :return: Compiled regex pattern.
"""
+ return re.compile(pattern)
- # Compile the pattern
- regex = re.compile(pattern)
- # Find all matching directories
- matching_dirs = [
+def find_matching_directories(directory, regex):
+ """
+ Find all matching directories in the given directory based on the regex pattern.
+
+ :param directory: The root directory to search for matching directories.
+ :param regex: Compiled regular expression pattern to match directory names.
+ :return: List of matching directories.
+ """
+ return [
d
for d in os.listdir(directory)
if os.path.isdir(os.path.join(directory, d)) and regex.match(d)
]
- # Separate items in refs_order and other items for semantic versioning sorting
+
+def separate_refs(matching_dirs, refs_order):
+ """
+ Separate items in refs_order and other items for semantic versioning sorting.
+
+ :param matching_dirs: List of matching directories.
+ :param refs_order: List determining the order of items to appear at the beginning.
+ :return: Tuple of ordered_refs and remaining_refs.
+ """
ordered_refs = [d for d in refs_order if d in matching_dirs]
remaining_refs = [d for d in matching_dirs if d not in refs_order]
+ return ordered_refs, remaining_refs
+
+
+def sorting_key(ref):
+ """
+ Define a custom sorting key function.
+
+ :param ref: Reference to be sorted.
+ :return: Tuple for sorting.
+ """
+ try:
+ return (0, Version(ref))
+ except InvalidVersion:
+ return (1, ref)
- # Define a custom sorting key function
- def sorting_key(ref):
- try:
- return (0, Version(ref))
- except InvalidVersion:
- return (1, ref)
- # Sort the remaining items using the custom sorting key (semantic versioning first, then alphabetically)
+def sort_remaining_refs(remaining_refs):
+ """
+ Sort the remaining items using the custom sorting key
+ (semantic versioning first, then alphabetically).
+
+ :param remaining_refs: List of remaining references.
+ :return: Sorted list of remaining references.
+ """
remaining_refs.sort(key=sorting_key, reverse=True)
+ return remaining_refs
- # Combine the ordered and remaining items
- ordered_refs.extend(remaining_refs)
- # Generate the full URLs for the directories
- refs_dict = {ref: f"{base_url}{ref}" for ref in ordered_refs}
+def generate_refs_dict(ordered_refs, base_url):
+ """
+ Generate the full URLs for the directories.
+
+ :param ordered_refs: List of ordered references.
+ :param base_url: The base URL to be used in the hrefs.
+ :return: Dictionary of references and their URLs.
+ """
+ return {ref: f"{base_url}{ref}" for ref in ordered_refs}
- # Generate the markup
+
+def generate_markup(ordered_refs, refs_dict):
+ """
+ Generate the HTML markup for the drop-down list.
+
+ :param ordered_refs: List of ordered references.
+ :param refs_dict: Dictionary of references and their URLs.
+ :return: str, Generated HTML markup.
+ """
nav_item = """
- Versions
+ Versions
"
-
return nav_item
-def insert_html_after_last_li(tree, dropdown_list):
+def generate_dropdown_list(directory, pattern, refs_order, base_url):
+ """
+ Generates version drop-down list to be inserted based
+ on matching directories in the given directory and refs_order.
+
+ :param directory: The root directory to search for matching directories.
+ :param pattern: A regular expression pattern to match directory names.
+ :param refs_order: List determining the order of items to appear at the beginning.
+ :param base_url: The base URL to be used in the hrefs.
+ :return: str, Generated HTML markup.
+ """
+ regex = compile_pattern(pattern)
+ matching_dirs = find_matching_directories(directory, regex)
+ ordered_refs, remaining_refs = separate_refs(matching_dirs, refs_order)
+ remaining_refs = sort_remaining_refs(remaining_refs)
+ ordered_refs.extend(remaining_refs)
+ refs_dict = generate_refs_dict(ordered_refs, base_url)
+ return generate_markup(ordered_refs, refs_dict)
+
+
+def find_navbar(tree):
+ """
+ Find the first element in the document that contains the class 'navbar-nav'.
+
+ :param tree: lxml HTML tree object.
+ :return: First element with class 'navbar-nav' or None.
+ """
+ navbar = tree.xpath(
+ "//div[@id='navbar']//ul[contains(@class, 'navbar-nav') and contains(@class, 'me-auto')]"
+ )
+ if not navbar:
+ print(
+ "No element with class 'navbar-nav' found in the document.",
+ file=sys.stderr,
+ )
+ return None
+ return navbar[0]
+
+
+def find_navbar_items(navbar):
+ """
+ Find - elements representing items in the navbar.
+
+ :param navbar: The navbar.
+ :return: List of
- elements.
+ """
+ if navbar or navbar is not None:
+ return navbar.xpath('.//li[contains(@class, "nav-item")]')
+ return []
+
+
+def create_versions_dropdown(dropdown_list):
+ """
+ Create a new element from the drop-down list markup.
+
+ :param dropdown_list: str, HTML markup containing the drop-down list to insert.
+ :return: Custom element or None.
"""
- Inserts the drop-down list after the n-th
- item in the unordered list.
+ try:
+ return html.fromstring(dropdown_list, parser=etree.HTMLParser())
+ except Exception as e:
+ print(f"Error parsing the drop-down list: {e}", file=sys.stderr)
+ return None
+
+
+def insert_versions_dropdown(tree, dropdown_list):
+ """
+ Inserts the drop-down list into the navbar.
:param tree: lxml HTML tree object.
:param dropdown_list: str, HTML markup containing the drop-down list to insert.
+ :return: bool, True if successful, False otherwise.
"""
+ navbar = find_navbar(tree)
+ if not navbar:
+ return False # No navbar found
+
+ navbar_items = find_navbar_items(navbar)
+ if not navbar_items:
+ return False # No navbar items found
+
+ versions_dropdown = create_versions_dropdown(dropdown_list)
+ if not versions_dropdown:
+ return False # Failed to create dropdown
+
+ # Find all
- that contain a
with aria-labelledby="dropdown-versions"
+ existing_dropdown = navbar.xpath('.//li[div/@aria-labelledby="dropdown-versions"]')
+
+ # If no existing dropdown is found, add the new dropdown to the end of the navbar
+ if not existing_dropdown:
+ new_li = html.Element("div")
+ new_li.append(versions_dropdown) # Append the new dropdown directly
+
+ # Append the new
- to the last
- item in the navbar
+ navbar[-1].addnext(new_li)
+ return True
+
+ # Remove duplicates by keeping track of IDs or contents
+ existing_ids = set()
+ for item in existing_dropdown:
+ dropdown_id = item.get("id") # or another identifier if necessary
+ if dropdown_id not in existing_ids:
+ existing_ids.add(dropdown_id)
+ else:
+ # Remove the duplicate
+ item.getparent().remove(item)
+
+ # Replace the first remaining existing dropdown with the new versions_dropdown
+ if existing_dropdown:
+ existing_dropdown[0].getparent().replace(
+ existing_dropdown[0], versions_dropdown
+ )
+
+ return True
- # Find the first
element in the document
- ul_element = tree.xpath("//ul[1]")
- if not ul_element:
- print("No element found in the document.", file=sys.stderr)
- return False
+def read_file(file_path):
+ """
+ Read the content of a file.
+
+ :param file_path: Path to the file.
+ :return: Content of the file or None.
+ """
+ try:
+ with open(file_path, "r", encoding="utf-8") as f:
+ return f.read()
+ except FileNotFoundError:
+ print(f"Error: The file '{file_path}' was not found.", file=sys.stderr)
+ except PermissionError:
+ print(
+ f"Error: Permission denied to read the file '{file_path}'.", file=sys.stderr
+ )
+ except Exception as e:
+ print(
+ f"An unexpected error occurred while reading the file '{file_path}': {e}",
+ file=sys.stderr,
+ )
+ return None
+
+
+def write_file(file_path, content):
+ """
+ Write content to a file.
- # Find - elements representing items in the nav-bar.
- li_elements = ul_element[0].xpath('.//li[contains(@class, "nav-item")]')
+ :param file_path: Path to the file.
+ :param content: Content to write.
+ :return: bool, True if successful, False otherwise.
+ """
+ try:
+ with open(file_path, "w", encoding="utf-8") as f:
+ f.write(content)
+ return True
+ except PermissionError:
+ print(
+ f"Error: Permission denied to write to the file '{file_path}'.",
+ file=sys.stderr,
+ )
+ except Exception as e:
+ print(
+ f"An unexpected error occurred while writing to the file '{file_path}': {e}",
+ file=sys.stderr,
+ )
+ return False
+
+
+def process_single_html_file(file_path, dropdown_list):
+ """
+ Process a single HTML file, inserting a dropdown list after the last
- element.
- if li_elements:
- # Create a new element from the drop-down list markup
- try:
- custom_element = html.fromstring(dropdown_list)
- except Exception as e:
- print(f"Error parsing the drop-down list: {e}", file=sys.stderr)
- return False
+ :param file_path: Path to the HTML file.
+ :param dropdown_list: HTML content for the dropdown list.
+ :return: bool, True if successful, False otherwise.
+ """
+ html_contents = read_file(file_path)
+ if html_contents is None:
+ return False
- # Get the last element on the
list.
- last_li = li_elements[-1]
+ try:
+ tree = html.fromstring(html_contents)
+ except etree.XMLSyntaxError as e:
+ print(f"Error parsing the HTML: {e}", file=sys.stderr)
+ return False
- # Insert the custom element after the n-th element
- last_li.addnext(custom_element)
+ success = insert_versions_dropdown(tree, dropdown_list)
+ if not success:
+ print(f"❌ {file_path}", file=sys.stderr)
+ return False
+ modified_html = etree.tostring(
+ tree, encoding="unicode", pretty_print=True, method="html"
+ )
+ if not write_file(file_path, modified_html):
+ return False
+
+ print(f"✅ {file_path}")
return True
def process_html_files_in_directory(directory, pattern, refs_order, base_url):
- processed_files = set()
+ """
+ Process all HTML files in the given directory,
+ inserting a dropdown list after the last - element.
- # Generate the drop-down list
+ :param directory: The root directory to search for HTML files.
+ :param pattern: Regular expression pattern to match directory names.
+ :param refs_order: List determining the order of items to appear at the beginning.
+ :param base_url: Base URL to be used in the hrefs.
+ """
+ processed_files = set()
dropdown_list = generate_dropdown_list(directory, pattern, refs_order, base_url)
- dropdown_regex = re.compile(
- r".*",
- re.DOTALL,
- )
-
- # Find all HTML files in the directory and subdirectories
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".html"):
file_path = os.path.join(root, file)
-
- # Avoid processing the same file twice
if file_path in processed_files:
continue
- # Read the input HTML file
- try:
- with open(file_path, "r", encoding="utf-8") as f:
- input_html = f.read()
- except FileNotFoundError:
- print(
- f"Error: The file '{file_path}' was not found.", file=sys.stderr
- )
- continue
- except PermissionError:
- print(
- f"Error: Permission denied to read the file '{file_path}'.",
- file=sys.stderr,
- )
- continue
- except Exception as e:
- print(
- f"An unexpected error occurred while reading the file '{file_path}': {e}",
- file=sys.stderr,
- )
- continue
-
- # Remove the content between the specified HTML comments:
- #
- #
- # which in the previous implementation of this action were used to
- # mark the beginning and ending of the version drop-down.
- input_html = dropdown_regex.sub("", input_html)
-
- # Parse the HTML content
- try:
- tree = html.fromstring(input_html)
- except html.XMLSyntaxError as e:
- print(f"Error parsing the HTML: {e}", file=sys.stderr)
- continue
+ if process_single_html_file(file_path, dropdown_list):
+ processed_files.add(file_path)
- # Insert the drop-down list
- success = insert_html_after_last_li(tree, dropdown_list)
- if not success:
- print(f"❌ {file_path}", file=sys.stderr)
- continue
+def update_single_search_json(search_json_path, version, base_url):
+ """
+ Update the URLs in a single search.json file to include the version.
- # Convert the modified part back to string and update the file.
- modified_html = etree.tostring(
- tree, encoding="unicode", pretty_print=True, method="html"
- )
-
- # Write the result to the output file (overwrite the input file)
- try:
- with open(file_path, "w", encoding="utf-8") as f:
- f.write(modified_html)
- except PermissionError:
- print(
- f"Error: Permission denied to write to the file '{file_path}'.",
- file=sys.stderr,
- )
- continue
- except Exception as e:
- print(
- f"An unexpected error occurred while writing to the file '{file_path}': {e}",
- file=sys.stderr,
- )
- continue
+ :param search_json_path: Path to the search.json file.
+ :param version: Version to be included in the URLs.
+ :param base_url: Base URL to be used in the hrefs.
+ :return: bool, True if successful, False otherwise.
+ """
+ file_content = read_file(search_json_path)
+ if file_content is None:
+ return False
- print(f"✅ {file_path}")
+ url_pattern = re.compile(rf"({re.escape(base_url)})(?!{re.escape(version)})")
+ updated_content = url_pattern.sub(f"{base_url}{version}/", file_content)
- # Mark this file as processed
- processed_files.add(file_path)
+ if updated_content != file_content:
+ if write_file(search_json_path, updated_content):
+ print(f"Updated URLs in {search_json_path}")
+ return True
+ print(f"Failed to update URLs in {search_json_path}")
+ return False
+ print(f"No URLs to update in {search_json_path}")
+ return True
def update_search_json_urls(directory, pattern, base_url):
"""
- Looks for directories matching the pattern and updates URLs in `search.json` files.
+ Update the URLs in search.json files within the given directory to include the version.
- :param directory: The root directory to search for matching directories.
- :param pattern: The regular expression pattern to match directory names.
+ :param directory: The root directory to search for search.json files.
+ :param pattern: Regular expression pattern to match directory names.
+ :param base_url: Base URL to be used in the hrefs.
"""
- # Compile the pattern
- regex = re.compile(pattern)
-
- # Find all matching directories
- matching_dirs = [
- d
- for d in os.listdir(directory)
- if os.path.isdir(os.path.join(directory, d)) and regex.match(d)
- ]
+ regex = compile_pattern(pattern)
+ matching_dirs = find_matching_directories(directory, regex)
for current_directory in matching_dirs:
search_json_path = os.path.join(directory, current_directory, "search.json")
-
- # Check if search.json exists in the current directory
if not os.path.isfile(search_json_path):
continue
- # Read and update the search.json file
- try:
- with open(search_json_path, "r", encoding="utf-8") as f:
- file_content = f.read()
-
- version = current_directory
-
- # Regex pattern to match URLs that start with the base URL but don't have the version
- # corresponding to the current directory immediately afterwards.
- url_pattern = re.compile(
- rf"({re.escape(base_url)})(?!{re.escape(version)})"
- )
-
- # Replace the matched URLs with the updated URLs (containing given version).
- updated_content = url_pattern.sub(f"{base_url}{version}/", file_content)
-
- # Write the updated content back to the search.json file if changes were made
- if updated_content != file_content:
- with open(search_json_path, "w", encoding="utf-8") as f:
- f.write(updated_content)
- print(f"Updated URLs in {search_json_path}")
- else:
- print(f"No URLs to update in {search_json_path}")
-
- except Exception as e:
- print(
- f"An unexpected error occurred while processing '{search_json_path}': {e}",
- file=sys.stderr,
- )
+ update_single_search_json(search_json_path, current_directory, base_url)
def main():
+ """Main."""
parser = argparse.ArgumentParser(
- description="Insert the multi-version drop-down list after the n-th
- item in unordered lists in all HTML files within a directory."
+ description="Insert the multi-version drop-down list after the n-th
- "
+ "item in unordered lists in all HTML files within a directory."
)
parser.add_argument(
"directory", help="Path to the directory containing HTML files."
@@ -268,12 +401,9 @@ def main():
args = parser.parse_args()
- # Process all HTML files in the specified directory
process_html_files_in_directory(
args.directory, args.pattern, args.refs_order, args.base_url
)
-
- # Update URLs in search.json files
update_search_json_urls(args.directory, args.pattern, args.base_url)