docs: link to Doxygen output instead of using breathe

Fixes apache#1935.
lidavidm · Sep 27, 2024 · 230d401 · 230d401
1 parent d204b82
commit 230d401
Show file tree

Hide file tree

Showing 7 changed files with 288 additions and 64 deletions.
diff --git a/ci/conda_env_docs.txt b/ci/conda_env_docs.txt
@@ -15,17 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-breathe
 doxygen
-# XXX(https://github.com/apache/arrow-adbc/issues/987)
-furo>=2023.09.10
+furo
 make
 # Needed to install mermaid
 nodejs
 numpydoc
 pytest
-# XXX: we're stuck until we can get rid of Breathe
-sphinx=6.*
+# XXX: furo on conda-forge says it isn't compatible with sphinx 8, but in
+# reality it is (the conda metadata for one of its dependencies is wrong)
+# https://github.com/conda-forge/sphinx-basic-ng-feedstock/pull/11
+sphinx >=7
 sphinx-autobuild
 sphinx-copybutton
 sphinx-design

diff --git a/ci/scripts/docs_build.sh b/ci/scripts/docs_build.sh
@@ -40,6 +40,7 @@ main() {
  # We need to determine the base URL without knowing it...
  # Inject a dummy URL here, and fix it up in website_build.sh
  export ADBC_INTERSPHINX_MAPPING_java_adbc="http://javadocs.home.arpa/;$source_dir/java/target/site/apidocs/objects.inv"
+ export ADBC_INTERSPHINX_MAPPING_cpp_adbc="http://doxygen.home.arpa/;$source_dir/"
 
  make html
  rm -rf "$source_dir/docs/build/html/java/api"

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -43,7 +43,6 @@
  "adbc_cookbook",
  # generic directives to enable intersphinx for java
  "adbc_java_domain",
- "breathe",
  "numpydoc",
  "sphinx.ext.autodoc",
  "sphinx.ext.doctest",
@@ -78,13 +77,6 @@
  "show-inheritance": True,
 }
 
-# -- Options for Breathe -----------------------------------------------------
-
-breathe_default_project = "adbc"
-breathe_projects = {
- "adbc": "../../c/apidoc/xml/",
-}
-
 # -- Options for doctest -----------------------------------------------------
 
 doctest_global_setup = """
@@ -132,10 +124,6 @@ def _find_intersphinx_mappings():
  url, _, path = val.partition(";")
  print("[ADBC] Found Intersphinx mapping", name)
  intersphinx_mapping[name] = (url, path)
- # "adbc_java": (
- # "http://localhost:8000/",
- # "/home/lidavidm/Code/arrow-adbc/java/target/site/apidocs/objects.inv",
- # ),
 
 
 _find_intersphinx_mappings()

diff --git a/docs/source/cpp/driver_manager.rst b/docs/source/cpp/driver_manager.rst
@@ -94,13 +94,12 @@ Then they can be used via CMake, e.g.:
 Usage
 =====
 
-To create a database, use the :cpp:class:`AdbcDatabase` API as usual,
-but during initialization, provide two additional parameters in
-addition to the driver-specific connection parameters: ``driver`` and
-(optionally) ``entrypoint``. ``driver`` must be the name of a library
-to load, or the path to a library to load. ``entrypoint``, if
-provided, should be the name of the symbol that serves as the ADBC
-entrypoint (see :cpp:type:`AdbcDriverInitFunc`).
+To create a database, use the :c:struct:`AdbcDatabase` API as usual, but
+during initialization, provide two additional parameters in addition to the
+driver-specific connection parameters: ``driver`` and (optionally)
+``entrypoint``. ``driver`` must be the name of a library to load, or the path
+to a library to load. ``entrypoint``, if provided, should be the name of the
+symbol that serves as the ADBC entrypoint (see :c:type:`AdbcDriverInitFunc`).
 
 .. code-block:: c
 

diff --git a/docs/source/ext/doxygen_inventory.py b/docs/source/ext/doxygen_inventory.py
@@ -0,0 +1,199 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Generate a Sphinx inventory for a Doxygen site.
+
+Requires the XML and HTML output.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import typing
+import urllib.parse
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+import sphinx.util.inventory
+from fake_inventory import (
+ FakeBuildEnvironment,
+ FakeBuilder,
+ FakeDomain,
+ FakeEnv,
+ FakeObject,
+)
+
+
+def parse_member_id(member_id: str) -> str:
+ # group__adbc-statement_1gab81e98bf3871f9b7ce1445fad39eddbd ->
+ # gab81e98bf3871f9b7ce1445fad39eddbd
+ result = member_id.split("_")[-1][1:]
+ assert len(result) in (33, 34)
+ return result
+
+
+def scrape_links(item_id_to_url, root):
+ """Parse Doxygen XML files to extract definitions."""
+ for compounddef in root.findall("compounddef"):
+ kind = compounddef.attrib.get("kind")
+ if kind == "dir":
+ # Ignore, this is generated for a directory
+ continue
+ elif kind in ("file", "group", "struct"):
+ if kind == "struct":
+ name = compounddef.find("compoundname").text
+ anchor = compounddef.attrib["id"]
+ url = item_id_to_url[anchor]
+ yield ("c", name, "struct", anchor, url)
+
+ for memberdef in compounddef.findall(".//memberdef"):
+ member_kind = memberdef.attrib.get("kind")
+ if member_kind == "define":
+ domain = "c"
+ name = memberdef.find("name").text
+ typ = "define"
+ elif member_kind == "function":
+ domain = "c"
+ name = memberdef.find("name").text
+ typ = "function"
+ elif member_kind == "typedef":
+ domain = "c"
+ name = memberdef.find("name").text
+ typ = "type"
+ elif member_kind == "variable":
+ domain = "c"
+ name = memberdef.find("qualifiedname").text
+ typ = "member"
+ else:
+ raise NotImplementedError(
+ f"<memberdef kind=\"{memberdef.attrib['kind']}\"> not supported"
+ )
+
+ anchor = parse_member_id(memberdef.attrib["id"])
+ if anchor not in item_id_to_url:
+ # TODO: I think this is due to bad Doxygen config
+ print(
+ f"WARNING: Could not find URL for :{domain}:{typ}:`{name}` ({anchor})"
+ )
+ continue
+ url = item_id_to_url[anchor]
+ yield (domain, name, typ, anchor, url)
+ elif kind == "namespace":
+ # Ignore, this seems to be emitted for things referencing std::
+ continue
+ elif kind == "page":
+ # Ignore (README.md etc)
+ continue
+ else:
+ raise NotImplementedError(f'<compounddef kind="{kind}"> not supported')
+
+
+def make_fake_domains(
+ html_root: Path, xml_root: Path, base_url: str
+) -> dict[str, FakeDomain]:
+ if not base_url.endswith("/"):
+ base_url += "/"
+
+ # Scrape the HTML pages generated by Doxygen for anchors. This gives us a
+ # mapping from item hex codes to URLs.
+ item_id_to_url = {}
+ html_name = re.compile(r'name="([^\"]+)"')
+ for index in html_root.rglob("*.html"):
+ with index.open() as source:
+ matches = html_name.findall(source.read())
+ for m in matches:
+ url = str(index.relative_to(html_root))
+ item_id_to_url[m] = url
+
+ domains = {
+ "c": FakeDomain(objects=[]),
+ "cpp": FakeDomain(objects=[]),
+ "std": FakeDomain(objects=[]),
+ }
+
+ # Scrape the XML index generated by Doxygen. We can use the previous
+ # index to generate the proper URL.
+ for index in xml_root.rglob("*.xml"):
+ print("Parsing", index)
+ tree = ET.parse(index)
+ root = tree.getroot()
+
+ for domain, name, typ, anchor, url in scrape_links(item_id_to_url, root):
+ url = urllib.parse.urljoin(base_url, url)
+ domains[domain].objects.append(
+ FakeObject(
+ name=name,
+ dispname=name,
+ typ=typ,
+ docname=url,
+ anchor=anchor,
+ prio=1,
+ )
+ )
+ print(f"Linking :{domain}:{typ}:`{name}` to {url}#{anchor}")
+
+ while False:
+ url = urllib.parse.urljoin(base_url, url)
+ domains[domain].objects.append(
+ FakeObject(
+ name=name,
+ dispname=name,
+ typ=typ,
+ docname=url,
+ anchor=anchor,
+ prio=1,
+ )
+ )
+
+ return domains
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("project", help="Project name")
+ parser.add_argument("version", help="Project version")
+ parser.add_argument(
+ "--html-path", type=Path, help="Path to the Doxygen generated HTML"
+ )
+ parser.add_argument(
+ "--xml-path", type=Path, help="Path to the Doxygen generated XML"
+ )
+ parser.add_argument("url", help="Eventual base URL of the Doxygen docs")
+ parser.add_argument(
+ "output_dir", type=Path, help="Where to write the new objects.inv"
+ )
+
+ args = parser.parse_args()
+
+ domains = make_fake_domains(args.html_path, args.xml_path, args.url)
+ config = FakeEnv(project=args.project, version=args.version)
+ env = FakeBuildEnvironment(config=config, domains=domains)
+
+ output = args.output_dir / "objects.inv"
+ sphinx.util.inventory.InventoryFile.dump(
+ str(output),
+ env,
+ FakeBuilder(),
+ )
+ print("Wrote", output)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/source/ext/fake_inventory.py b/docs/source/ext/fake_inventory.py
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Utilities for making fake inventories for non-Sphinx docs.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import typing
+import urllib.parse
+from pathlib import Path
+
+import sphinx.util.inventory
+
+# XXX: we're taking advantage of duck typing to do stupid things here.
+
+
+class FakeEnv(typing.NamedTuple):
+ project: str
+ version: str
+
+
+class FakeObject(typing.NamedTuple):
+ # Looks like this
+ # name domainname:typ prio uri dispname
+ name: str
+ # written as '-' if equal to name
+ dispname: str
+ # member, doc, etc
+ typ: str
+ # passed through builder.get_target_uri
+ docname: str
+ # not including the #
+ anchor: str
+ # written, but never used
+ prio: str
+
+
+class FakeDomain(typing.NamedTuple):
+ objects: list[FakeObject]
+
+ def get_objects(self):
+ return self.objects
+
+
+class FakeBuildEnvironment(typing.NamedTuple):
+ config: FakeEnv
+ domains: dict[str, FakeDomain]
+
+
+class FakeBuilder:
+ def get_target_uri(self, docname: str) -> str:
+ return docname