readthedocs · ericholscher · Nov 23, 2024 · Nov 24, 2024 · Dec 6, 2024 · humitos
@@ -0,0 +1,50 @@
+"""
+Module for the file sections feature.
+
+This feature is used to store the title and path name of each page in the index.
+"""
+
+import json
+import logging
+
+from readthedocs.builds.models import Version
+from readthedocs.filesections.dataclasses import FileSectionManifest
+from readthedocs.projects.constants import MEDIA_TYPE_SECTIONS
+from readthedocs.storage import build_media_storage
+
+SECTION_MANIFEST_FILE_NAME = "sections_manifest.json"
+
+log = logging.getLogger(__name__)
+
+
+def get_section_manifest(version: Version) -> FileSectionManifest | None:
+    storage_path = version.project.get_storage_path(
+        type_=MEDIA_TYPE_SECTIONS,
+        version_slug=version.slug,
+        include_file=False,
+        version_type=version.type,
+    )
+    manifest_path = build_media_storage.join(storage_path, SECTION_MANIFEST_FILE_NAME)
+    try:
+        with build_media_storage.open(manifest_path) as manifest_file:
+            manifest = json.load(manifest_file)
+            log.info(f"Loaded section manifest from {manifest_path}")
+    except FileNotFoundError:
+        log.warning(f"Section manifest not found at {manifest_path}")
+        return None
+
+    return FileSectionManifest.from_dict(manifest)
+
+
+def write_section_manifest(version: Version, manifest: FileSectionManifest):
+    storage_path = version.project.get_storage_path(
+        type_=MEDIA_TYPE_SECTIONS,
+        version_slug=version.slug,
+        include_file=False,
+        version_type=version.type,
+    )
+    manifest_path = build_media_storage.join(storage_path, SECTION_MANIFEST_FILE_NAME)
+    with build_media_storage.open(manifest_path, "w") as f:
+        manifest_dict = manifest.as_dict()
+        log.info(f"Writing section manifest: {manifest_dict}")
+        json.dump(manifest_dict, f)
@@ -0,0 +1,38 @@
+from dataclasses import asdict, dataclass
+
+
+@dataclass(slots=True)
+class FileSection:
+    id: str
+    title: str
+
+
+@dataclass(slots=True)
+class Page:
+    path: str
+    sections: list[FileSection]
+
+
+@dataclass(slots=True)
+class FileSectionManifest:
+    build: int
+    pages: list[Page]
+
+    def __init__(self, build_id: int, pages: list[Page]):
+        self.build = build_id
+        self.pages = pages
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "FileSectionManifest":
+        build_id = data["build"]
+        pages = [
+            Page(
+                path=page["path"],
+                sections=[FileSection(**section) for section in page["sections"]],
+            )
+            for page in data["pages"]
+        ]
+        return cls(build_id, pages)
+
+    def as_dict(self) -> dict:
+        return asdict(self)
@@ -35,6 +35,7 @@
 MEDIA_TYPE_HTMLZIP = "htmlzip"
 MEDIA_TYPE_JSON = "json"
 MEDIA_TYPE_DIFF = "diff"
+MEDIA_TYPE_SECTIONS = "sections"
 DOWNLOADABLE_MEDIA_TYPES = (
     MEDIA_TYPE_PDF,
     MEDIA_TYPE_EPUB,
@@ -47,6 +48,7 @@
     MEDIA_TYPE_HTMLZIP,
     MEDIA_TYPE_JSON,
     MEDIA_TYPE_DIFF,
+    MEDIA_TYPE_SECTIONS,
 )
 
 BUILD_COMMANDS_OUTPUT_PATH = "_readthedocs/"

@@ -4,6 +4,8 @@
 
 from readthedocs.builds.constants import BUILD_STATE_FINISHED, INTERNAL, LATEST
 from readthedocs.builds.models import Build, Version
+from readthedocs.filesections import write_section_manifest
+from readthedocs.filesections.dataclasses import FileSection, FileSectionManifest, Page
 from readthedocs.filetreediff import write_manifest
 from readthedocs.filetreediff.dataclasses import FileTreeDiffFile, FileTreeDiffManifest
 from readthedocs.projects.models import HTMLFile, Project
@@ -142,6 +144,36 @@ def collect(self, sync_id: int):
         write_manifest(self.version, manifest)
 
 
+class FileSectionIndexer(Indexer):
+    def __init__(self, version: Version, build: Build):
+        self.version = version
+        self.build = build
+        self.pages = []
+
+    def process(self, html_file: HTMLFile, sync_id: int):
+        log.debug("Processing file for sections", path=html_file.path)
+        processed_json = html_file.processed_json
+        if processed_json:
+            sections = [
+                FileSection(
+                    id=section["id"],
+                    title=section["title"],
+                )
+                for section in processed_json.get("sections", [])
+            ]
+            self.pages.append(Page(path=html_file.path, sections=sections))
+        log.debug("Finished processing file for sections", path=html_file.path)
+
+    def collect(self, sync_id: int):
+        log.debug("Collecting sections for manifest", build_id=self.build.id)
+        manifest = FileSectionManifest(
+            build_id=self.build.id,
+            pages=self.pages,
+        )
+        write_section_manifest(self.version, manifest)
+        log.debug("Finished collecting sections for manifest", build_id=self.build.id)
+
+
 def _get_indexers(*, version: Version, build: Build, search_index_name=None):
     build_config = build.config or {}
     search_config = build_config.get("search", {})
@@ -186,6 +218,13 @@ def _get_indexers(*, version: Version, build: Build, search_index_name=None):
         version=version,
     )
     indexers.append(index_file_indexer)
+
+    file_section_indexer = FileSectionIndexer(
+        version=version,
+        build=build,
+    )
+    indexers.append(file_section_indexer)
+
     return indexers
 
 

@@ -23,6 +23,7 @@
 from readthedocs.core.resolver import Resolver
 from readthedocs.core.unresolver import UnresolverError, unresolver
 from readthedocs.core.utils.extend import SettingsOverrideObject
+from readthedocs.filesections import get_section_manifest
 from readthedocs.filetreediff import get_diff
 from readthedocs.projects.constants import (
     ADDONS_FLYOUT_SORTING_CALVER,
@@ -531,6 +532,9 @@ def _v1(self, project, version, build, filename, url, request):
                 "filetreediff": {
                     "enabled": False,
                 },
+                "filesections": {
+                    "enabled": False,
+                },
             },
         }
 
@@ -544,6 +548,13 @@ def _v1(self, project, version, build, filename, url, request):
             if response:
                 data["addons"]["filetreediff"].update(response)
 
+            sections_response = self._get_filesections_response(
+                project=project,
+                version=version,
+            )
+            if sections_response:
+                data["addons"]["filesections"].update(sections_response)
+
             # Show the subprojects filter on the parent project and subproject
             # TODO: Remove these queries and try to find a way to get this data
             # from the resolver, which has already done these queries.
@@ -707,6 +718,31 @@ def _get_filetreediff_response(self, *, request, project, version, resolver):
             },
         }
 
+    def _get_filesections_response(self, *, project, version):
+        """
+        Get the file sections response for the given version.
+        """
+        manifest = get_section_manifest(version)
+        if not manifest:
+            return None
+
+        return {
+            "enabled": True,
+            "pages": [
+                {
+                    "path": page.path,
+                    "sections": [
+                        {
+                            "id": section.id,
+                            "title": section.title,
+                        }
+                        for section in page.sections
+                    ],
+                }
+                for page in manifest.pages
+            ],
+        }
+
     def _v2(self, project, version, build, filename, url, user):
         return {
             "api_version": "2",