Skip to content

Commit

Permalink
Split out conversionPipeline.ts from updateApiDocs.ts (#952)
Browse files Browse the repository at this point in the history
This is a refactor to unblock the new integration test in
#715. We need to test the
conversion pipeline, so its code needs to live in `lib` not `commands`.

I renamed the function `convertHtmlToMarkdown` to instead
`runConversionPipeline` and moved it to `conversionPipeline.ts`. I
didn't want the phrase "html to markdown" because because we already
have `htmlToMd.ts`; this pipeline does way more than convert HTML to
markdown, such as also copying image files and inlining methods into
classes.

This PR also refactors the rest of `updateApiDocs.ts` via helper
functions. A follow up will refactor `conversionPipeline.ts`.
  • Loading branch information
Eric-Arellano authored Mar 4, 2024
1 parent 64e0a23 commit a26a851
Show file tree
Hide file tree
Showing 2 changed files with 218 additions and 191 deletions.
225 changes: 34 additions & 191 deletions scripts/commands/updateApiDocs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,15 @@
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { join, parse, relative } from "path";
import { readFile, writeFile } from "fs/promises";

import { globby } from "globby";
import { uniq, uniqBy } from "lodash";
import { mkdirp } from "mkdirp";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";
import transformLinks from "transform-markdown-links";

import { ObjectsInv } from "../lib/api/objectsInv";
import { sphinxHtmlToMarkdown } from "../lib/api/htmlToMd";
import { saveImages } from "../lib/api/saveImages";
import { generateToc } from "../lib/api/generateToc";
import { HtmlToMdResult } from "../lib/api/HtmlToMdResult";
import { mergeClassMembers } from "../lib/api/mergeClassMembers";
import flattenFolders from "../lib/api/flattenFolders";
import { updateLinks } from "../lib/api/updateLinks";
import { specialCaseResults } from "../lib/api/specialCaseResults";
import addFrontMatter from "../lib/api/addFrontMatter";
import { dedupeHtmlIdsFromResults } from "../lib/api/dedupeHtmlIds";
import { Pkg } from "../lib/api/Pkg";
import { zxMain } from "../lib/zx";
import { pathExists, getRoot, rmFilesInFolder } from "../lib/fs";
import { downloadSphinxArtifact } from "../lib/api/sphinxArtifacts";
import {
addNewReleaseNotes,
generateReleaseNotesIndex,
updateHistoricalTocFiles,
writeReleaseNoteForVersion,
} from "../lib/api/releaseNotes";
import { runConversionPipeline } from "../lib/api/conversionPipeline";

interface Arguments {
[x: string]: unknown;
Expand Down Expand Up @@ -89,17 +67,37 @@ const readArgs = (): Arguments => {
zxMain(async () => {
const args = readArgs();

// Determine the minor version, e.g. 0.44.0 -> 0.44
const versionMatch = args.version.match(/^(\d+\.\d+)/);
if (versionMatch === null) {
if (args.historical && args.dev) {
throw new Error(
`Invalid --version. Expected the format 0.44.0, but received ${args.version}`,
`${args.package} ${args.version} cannot be historical and dev at the same time. Please remove at least only one of these two arguments: --historical, --dev.`,
);
}

if (args.historical && args.dev) {
const minorVersion = determineMinorVersion(args);
const type = args.historical ? "historical" : args.dev ? "dev" : "latest";
const pkg = await Pkg.fromArgs(
args.package,
args.version,
minorVersion,
type,
);

const sphinxArtifactFolder = await prepareSphinxFolder(pkg, args);
const markdownOutputFolder = await prepareMarkdownOutputFolder(pkg);

console.log(`Run pipeline for ${pkg.name}:${pkg.versionWithoutPatch}`);
await runConversionPipeline(
`${sphinxArtifactFolder}/artifact`,
markdownOutputFolder,
pkg,
);
});

function determineMinorVersion(args: Arguments): string {
const versionMatch = args.version.match(/^(\d+\.\d+)/);
if (versionMatch === null) {
throw new Error(
`${args.package} ${args.version} cannot be historical and dev at the same time. Please remove at least only one of these two arguments: --historical, --dev.`,
`Invalid --version. Expected the format 0.44.0, but received ${args.version}`,
);
}

Expand All @@ -110,15 +108,10 @@ zxMain(async () => {
);
}

const type = args.historical ? "historical" : args.dev ? "dev" : "latest";

const pkg = await Pkg.fromArgs(
args.package,
args.version,
versionMatch[0],
type,
);
return versionMatch[0];
}

async function prepareSphinxFolder(pkg: Pkg, args: Arguments): Promise<string> {
const sphinxArtifactFolder = pkg.sphinxArtifactFolder();
if (
args.skipDownload &&
Expand All @@ -130,7 +123,10 @@ zxMain(async () => {
} else {
await downloadSphinxArtifact(pkg, sphinxArtifactFolder);
}
return sphinxArtifactFolder;
}

async function prepareMarkdownOutputFolder(pkg: Pkg): Promise<string> {
const outputDir = pkg.outputDir(`${getRoot()}/docs`);
if (!pkg.isLatest() && !(await pathExists(outputDir))) {
await mkdirp(outputDir);
Expand All @@ -140,158 +136,5 @@ zxMain(async () => {
);
await rmFilesInFolder(outputDir);
}

console.log(
`Convert sphinx html to markdown for ${pkg.name}:${pkg.versionWithoutPatch}`,
);
await convertHtmlToMarkdown(
`${sphinxArtifactFolder}/artifact`,
outputDir,
pkg,
);
});

async function convertHtmlToMarkdown(
htmlPath: string,
markdownPath: string,
pkg: Pkg,
) {
const maybeObjectsInv = await (pkg.hasObjectsInv()
? ObjectsInv.fromFile(htmlPath)
: undefined);
const files = await globby(
[
"apidocs/**.html",
"apidoc/**.html",
"stubs/**.html",
"release_notes.html",
],
{
cwd: htmlPath,
},
);

let results: Array<HtmlToMdResult & { url: string }> = [];
for (const file of files) {
const html = await readFile(join(htmlPath, file), "utf-8");
const result = await sphinxHtmlToMarkdown({
html,
fileName: file,
determineGithubUrl: pkg.determineGithubUrlFn(),
imageDestination: pkg.outputDir("/images"),
releaseNotesTitle: `${pkg.title} ${pkg.versionWithoutPatch} release notes`,
});

// Avoid creating an empty markdown file for HTML files without content
// (e.g. HTML redirects)
if (result.markdown == "") {
continue;
}

const { dir, name } = parse(`${markdownPath}/${file}`);
let url = `/${relative(`${getRoot()}/docs`, dir)}/${name}`;
results.push({ ...result, url });

if (
pkg.isLatest() &&
pkg.hasSeparateReleaseNotes &&
file.endsWith("release_notes.html")
) {
addNewReleaseNotes(pkg);
}
}

const allImages = uniqBy(
results.flatMap((result) => result.images),
(image) => image.fileName,
);

const dirsNeeded = uniq(
results.map((result) => parse(urlToPath(result.url)).dir),
);
for (const dir of dirsNeeded) {
await mkdirp(dir);
}

results = await mergeClassMembers(results);
flattenFolders(results);
specialCaseResults(results);
await updateLinks(results, maybeObjectsInv);
await dedupeHtmlIdsFromResults(results);
addFrontMatter(results, pkg);

await maybeObjectsInv?.write(pkg.outputDir("public"));
for (const result of results) {
let path = urlToPath(result.url);

// Historical or dev versions with a single release notes file should not
// modify the current API's file.
if (
!pkg.hasSeparateReleaseNotes &&
!pkg.isLatest() &&
path.endsWith("release-notes.md")
) {
continue;
}

// Dev versions haven't been released yet and we don't want to modify the release notes
// of prior versions
if (pkg.isDev() && path.endsWith("release-notes.md")) {
continue;
}

if (pkg.hasSeparateReleaseNotes && path.endsWith("release-notes.md")) {
const baseUrl = pkg.isHistorical()
? `/api/${pkg.name}/${pkg.versionWithoutPatch}`
: `/api/${pkg.name}`;

// Convert the relative links to absolute links
result.markdown = transformLinks(result.markdown, (link, _) =>
link.startsWith("http") || link.startsWith("#") || link.startsWith("/")
? link
: `${baseUrl}/${link}`,
);

await writeReleaseNoteForVersion(pkg, result.markdown);
continue;
}

await writeFile(path, result.markdown);
}

console.log("Generating toc");
const toc = generateToc(pkg, results);
await writeFile(
`${markdownPath}/_toc.json`,
JSON.stringify(toc, null, 2) + "\n",
);

// Add the new release entry to the _toc.json for all historical API versions.
// We don't need to add any entries in projects with a single release notes file.
if (pkg.isLatest() && pkg.hasSeparateReleaseNotes) {
await updateHistoricalTocFiles(pkg);
}

if (pkg.isLatest() && pkg.hasSeparateReleaseNotes) {
console.log("Generating release-notes/index");
const markdown = generateReleaseNotesIndex(pkg);
await writeFile(`${markdownPath}/release-notes/index.md`, markdown);
}

console.log("Generating version file");
const pkg_json = { name: pkg.name, version: pkg.version };
await writeFile(
`${markdownPath}/_package.json`,
JSON.stringify(pkg_json, null, 2) + "\n",
);

if (!pkg.isHistorical() || (await pathExists(`${htmlPath}/_images`))) {
// Some historical versions don't have the `_images` folder in the artifact store in Box (https://ibm.ent.box.com/folder/246867452622)
console.log("Saving images");
await saveImages(allImages, `${htmlPath}/_images`, pkg);
}
}

function urlToPath(url: string) {
return `${getRoot()}/docs${url}.md`;
return outputDir;
}
Loading

0 comments on commit a26a851

Please sign in to comment.