Skip to content

Commit

Permalink
Add objects.inv (#522)
Browse files Browse the repository at this point in the history
- Added `objectsInv.ts` to read, decompress, and parse `objects.inv`
files (and vice versa)
- Updated `updateApiDocs.ts` to copy the file over from the artifact
- Added logic to transform links and delete any references that don't
exist in our docs

### Explanation

The `objects.inv` file is an index that
[intersphinx](https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html)
uses to create cross-references to our documentation. To allow other
projects to cross-reference Qiskit, we need to host this file.

For a description of the file's data structure, see the [sphobjinv
documentation](https://sphobjinv.readthedocs.io/en/stable/syntax.html),
whose implementation I copied when working out how to transform this
file.

---------

Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com>
  • Loading branch information
frankharkins and Eric-Arellano authored Jan 17, 2024
1 parent b2e6d10 commit e09cfcf
Show file tree
Hide file tree
Showing 13 changed files with 506 additions and 9 deletions.
Binary file added public/api/qiskit-ibm-provider/objects.inv
Binary file not shown.
Binary file added public/api/qiskit-ibm-runtime/objects.inv
Binary file not shown.
Binary file added public/api/qiskit/objects.inv
Binary file not shown.
11 changes: 9 additions & 2 deletions scripts/commands/checkLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,10 @@ async function determineCurrentDocsFileBatch(
): Promise<FileBatch> {
const toCheck = [
"docs/**/*.{ipynb,md,mdx}",
"public/api/*/objects.inv",
// Ignore historical versions
"!docs/api/{qiskit,qiskit-ibm-provider,qiskit-ibm-runtime}/[0-9]*/*",
"!public/api/{qiskit,qiskit-ibm-provider,qiskit-ibm-runtime}/[0-9]*/*",
];
const toLoad = [
"docs/api/qiskit/0.44/{algorithms,opflow}.md",
Expand All @@ -127,7 +129,9 @@ async function determineCurrentDocsFileBatch(
];

if (!args.currentApis) {
toCheck.push("!docs/api/{qiskit,qiskit-ibm-provider,qiskit-ibm-runtime}/*");
toCheck.push(
"!{public,docs}/api/{qiskit,qiskit-ibm-provider,qiskit-ibm-runtime}/*",
);
toLoad.push("docs/api/{qiskit,qiskit-ibm-provider,qiskit-ibm-runtime}/*");
}

Expand Down Expand Up @@ -161,7 +165,10 @@ async function determineHistoricalFileBatches(
const result = [];
for (const folder of historicalFolders) {
const fileBatch = await FileBatch.fromGlobs(
[`docs/api/${projectName}/${folder.name}/*`],
[
`docs/api/${projectName}/${folder.name}/*`,
`public/api/${projectName}/${folder.name}/objects.inv`,
],
toLoad,
`${projectName} v${folder.name}`,
);
Expand Down
5 changes: 4 additions & 1 deletion scripts/commands/updateApiDocs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";
import transformLinks from "transform-markdown-links";

import { ObjectsInv } from "../lib/api/objectsInv";
import { sphinxHtmlToMarkdown } from "../lib/api/htmlToMd";
import { saveImages } from "../lib/api/saveImages";
import { generateToc } from "../lib/api/generateToc";
Expand Down Expand Up @@ -188,6 +189,7 @@ async function convertHtmlToMarkdown(
baseGitHubUrl: string,
pkg: Pkg,
) {
const objectsInv = await ObjectsInv.fromFile(htmlPath);
const files = await globby(
[
"apidocs/**.html",
Expand Down Expand Up @@ -245,10 +247,11 @@ async function convertHtmlToMarkdown(
results = await mergeClassMembers(results);
flattenFolders(results);
specialCaseResults(results);
await updateLinks(results, pkg.transformLink);
await updateLinks(results, objectsInv, pkg.transformLink);
await dedupeHtmlIdsFromResults(results);
addFrontMatter(results, pkg);

await objectsInv.write(getPkgRoot(pkg, "public"));
for (const result of results) {
let path = urlToPath(result.url);

Expand Down
127 changes: 127 additions & 0 deletions scripts/lib/api/objectsInv.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2023.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { describe, expect, test } from "@jest/globals";
import { ObjectsInv, ObjectsInvEntry } from "./objectsInv";
import { unlink, stat } from "fs/promises";

const TEST_FOLDER = "scripts/lib/api/testdata/";
const TEMP_FOLDER = "scripts/lib/api/testdata/temp/";

describe("objects.inv", () => {
test("read file and decompress", async () => {
const objectsInv = await ObjectsInv.fromFile(TEST_FOLDER);

expect(objectsInv.preamble).toMatch(
"# Sphinx inventory version 2\n" +
"# Project: Qiskit\n" +
"# Version: 0.45\n" +
"# The remainder of this file is compressed using zlib.\n",
);

const uriIndices = [10, 88, 107, 1419, 23575];
// This test fails when you include / exclude entries, which shifts some array indices.
// Use the following code to find the new indices.
// console.log(objectsInv.entries.findLastIndex( e => { return e.uri.includes("index") }))
expect(uriIndices.map((i) => objectsInv.entries[i].uri))
.toMatchInlineSnapshot(`
[
"stubs/qiskit.algorithms.AlgorithmJob.html#qiskit.algorithms.AlgorithmJob.job_id",
"stubs/qiskit.algorithms.FasterAmplitudeEstimation.html#qiskit.algorithms.FasterAmplitudeEstimation.sampler",
"stubs/qiskit.algorithms.Grover.html#qiskit.algorithms.Grover.quantum_instance",
"apidoc/assembler.html#qiskit.assembler.disassemble",
"index.html",
]
`);
const nameIndices = [23575, 24146];
expect(nameIndices.map((i) => objectsInv.entries[i].dispname))
.toMatchInlineSnapshot(`
[
"Qiskit 0.45 documentation",
"FakeOslo",
]
`);
});

test("write file and re-read matches original", async () => {
const originalObjectsInv = await ObjectsInv.fromFile(TEST_FOLDER);
await originalObjectsInv.write(TEMP_FOLDER);

const newObjectsInv = await ObjectsInv.fromFile(TEMP_FOLDER);
expect(originalObjectsInv.entries.length).toEqual(
newObjectsInv.entries.length,
);
expect(originalObjectsInv.preamble).toMatch(newObjectsInv.preamble);
expect(originalObjectsInv.entriesString()).toMatch(
newObjectsInv.entriesString(),
);
});

test("URI transform works correctly", () => {
const preamble = `# Simple preamble\n`;
// Use nonsense transform function to check things are actually changing
const transformFunction = (x: string) => x.replaceAll("i", "a");
const entries: ObjectsInvEntry[] = [
{
name: "qiskit_ibm_runtime.RuntimeJob.job_id",
domainAndRole: "py:method",
priority: "1",
uri: "qiskit_ibm_runtime.RuntimeJob#qiskit_ibm_runtime.RuntimeJob.job_id",
dispname: "-",
},
{
name: "stubs/qiskit_ibm_provider.transpiler.passes.scheduling.ASAPScheduleAnalysis.__call__",
domainAndRole: "std:doc",
priority: "-1",
uri: "stubs/qiskit_ibm_provider.transpiler.passes.scheduling.ASAPScheduleAnalysis.__call__.html",
dispname: "ASAPScheduleAnalysis.__call__",
},
{
name: "search",
domainAndRole: "std:label",
priority: "-1",
uri: "search.html",
dispname: "Search Page",
},
{
name: "release notes_ignis_0.5.0",
domainAndRole: "std:label",
priority: "-1",
uri: "legacy_release_notes.html#release-notes-ignis-0-5-0",
dispname: "Ignis 0.5.0",
},
{
name: "index",
domainAndRole: "std:doc",
priority: "-1",
uri: "index.html",
dispname: "Qiskit IBM Quantum Provider API docs preview",
},
];

const objectsInv = new ObjectsInv(preamble, entries);
objectsInv.updateUris(transformFunction);
expect(objectsInv.entries.map((i) => i.uri)).toEqual([
"qaskat_abm_runtame.RuntameJob#qaskat_abm_runtame.RuntameJob.job_ad",
"stubs/qaskat_abm_provader.transpaler.passes.schedulang.ASAPScheduleAnalysas.__call__",
"search",
"legacy_release_notes#release-notes-agnas-0-5-0",
"andex",
]);
});

afterAll(async () => {
if (await stat(TEMP_FOLDER + "objects.inv")) {
await unlink(TEMP_FOLDER + "objects.inv");
}
});
});
171 changes: 171 additions & 0 deletions scripts/lib/api/objectsInv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { readFile, writeFile } from "fs/promises";
import { unzipSync, deflateSync } from "zlib";
import { removeSuffix } from "../stringUtils";
import { join, dirname } from "path";
import { mkdirp } from "mkdirp";

/**
* Some pages exist in the sphinx docs but not in our docs
* If any URIs match these cases, we remove their entries.
**/
const ENTRIES_TO_EXCLUDE = [
/^genindex(\.html)?$/,
/^py-modindex(\.html)?$/,
/^search(\.html)?$/,
/^explanation(\.html)?(?=\/|#|$)/,
/^how_to(\.html)?(?=\/|#|$)/,
/^tutorials(\.html)?(?=\/|#|$)/,
/^migration_guides(\.html)?(?=\/|#|$)/,
/^configuration(\.html)?(?=#|$)/,
/^contributing_to_qiskit(\.html)?(?=#|$)/,
/^deprecation_policy(\.html)?(?=#|$)/,
/^faq(\.html)?(?=#|$)/,
/^getting_started(\.html)?(?=#|$)/,
/^intro_tutorial1(\.html)?(?=#|$)/,
/^maintainers_guide(\.html)?(?=#|$)/,
/^qc_intro(\.html)?(?=#|$)/,
/^release[-_]notes(\.html)?(?=#|$)/,
/^legacy_release_notes(\.html)?(?=#|$)/,
];

function shouldExcludePage(uri: string): boolean {
return ENTRIES_TO_EXCLUDE.some((condition) => uri.match(condition));
}

export type ObjectsInvEntry = {
name: string;
domainAndRole: string;
priority: string;
uri: string;
dispname: string;
};

/**
* Class to hold and operate on data from Sphinx's objects.inv file.
* For information on the syntax, see:
* https://sphobjinv.readthedocs.io/en/stable/syntax.html
*/
export class ObjectsInv {
readonly preamble: string;
entries: ObjectsInvEntry[];

constructor(preamble: string, entries: ObjectsInvEntry[]) {
this.preamble = preamble;
this.entries = entries;
}

/**
* Decompress Sphinx's objects.inv.
* This function follows the process from:
* https://github.com/bskinn/sphobjinv/blob/stable/src/sphobjinv/zlib.py
*/
static async fromFile(directoryPath: string): Promise<ObjectsInv> {
const path = join(directoryPath, "objects.inv");
let buffer = await readFile(path);
// Extract preamble (first 4 lines of file)
let preamble = "";
for (let line = 0; line < 4; line++) {
let nextNewline = buffer.indexOf(10) + 1;
preamble += buffer.toString("utf8", 0, nextNewline);
buffer = buffer.subarray(nextNewline);
}

// Decompress the rest
const lines = unzipSync(buffer).toString("utf8").trim().split("\n");

// Sort the strings into their parts
const entries: ObjectsInvEntry[] = [];
for (const line of lines) {
// Regex from sphinx source
// https://github.com/sphinx-doc/sphinx/blob/2f60b44999d7e610d932529784f082fc1c6af989/sphinx/util/inventory.py#L115-L116
const parts = line.match(/(.+?)\s+(\S+)\s+(-?\d+)\s+?(\S*)\s+(.*)/);
if (parts == null || parts.length != 6) {
console.warn(`Error parsing line of objects.inv: ${line}`);
continue;
}
const entry = {
name: parts[1],
domainAndRole: parts[2],
priority: parts[3],
uri: parts[4],
dispname: parts[5],
};
entry.uri = ObjectsInv.#expandUri(entry.uri, entry.name);
if (shouldExcludePage(entry.uri)) {
continue;
}

entries.push(entry);
}

return new ObjectsInv(preamble, entries);
}

static #expandUri(uri: string, name: string): string {
if (uri.includes("#") && uri.endsWith("$")) {
// #$ is a shorthand for "anchor==name"; see "For illustration" in
// https://sphobjinv.readthedocs.io/en/stable/syntax.html
uri = removeSuffix(uri, "$") + name;
}
return uri;
}

static #compressUri(uri: string, name: string): string {
if (uri.includes("#") && uri.endsWith(name)) {
uri = removeSuffix(uri, name) + "$";
}
return uri;
}

updateUris(transformLink: Function): void {
for (const entry of this.entries) {
entry.uri = entry.uri.replace(/\.html/, "");
entry.uri = transformLink(entry.uri);
}
}

/**
* Return all entries joined together as a single string
* to be compressed before writing
*/
entriesString(): string {
const lines: string[] = [];
for (const e of this.entries) {
lines.push(
[
e.name,
e.domainAndRole,
e.priority,
ObjectsInv.#compressUri(e.uri, e.name),
e.dispname,
].join(" "),
);
}
return lines.join("\n");
}

/**
* Compress and write to file
*/
async write(directoryPath: string): Promise<void> {
const path = join(directoryPath, "objects.inv");
const preamble = Buffer.from(this.preamble);
const compressed = deflateSync(Buffer.from(this.entriesString(), "utf8"), {
level: 9,
});
await mkdirp(dirname(path));
await writeFile(path, Buffer.concat([preamble, compressed]));
}
}
1 change: 0 additions & 1 deletion scripts/lib/api/specialCaseResults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ export function specialCaseResults(results: HtmlToMdResultWithUrl[]): void {
hardcodedFrontmatter: RUNTIME_INDEX_META,
};
}

result.url = transformSpecialCaseUrl(result.url);
}
}
Binary file added scripts/lib/api/testdata/objects.inv
Binary file not shown.
Loading

0 comments on commit e09cfcf

Please sign in to comment.