From 8f29c78b07cb405af16a553f1f59ea9959832bc3 Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Mon, 21 Feb 2022 23:42:17 +0900 Subject: [PATCH] Update the h2m CLI to find spec URLs and add spec-urls frontmatter key Part of https://github.com/mdn/content/issues/13126 --- content/document.js | 1 + kumascript/macros/RFC.ejs | 8 ++- markdown/cli.ts | 140 +++++++++++++++++++++++++++++++++++++- 3 files changed, 147 insertions(+), 2 deletions(-) diff --git a/content/document.js b/content/document.js index 2eb0662bbbcb..05996aa80d71 100644 --- a/content/document.js +++ b/content/document.js @@ -96,6 +96,7 @@ function saveFile(filePath, rawBody, metadata, frontMatterKeys = null) { "translation_of_original", "original_slug", "browser-compat", + "spec-urls", ]; const saveMetadata = {}; diff --git a/kumascript/macros/RFC.ejs b/kumascript/macros/RFC.ejs index 5926cbca3ea3..c656e520a594 100644 --- a/kumascript/macros/RFC.ejs +++ b/kumascript/macros/RFC.ejs @@ -12,7 +12,13 @@ // it, and the default text shown has ", section $2" appended to it. // -var link = "https://datatracker.ietf.org/doc/html/rfc" + $0; +var link = "https://www.rfc-editor.org/rfc/rfc" + $0; + +if ([6265,7230,7231,7232,7233,7234,7235,7538,7725,7838,8470] + .includes(parseInt($0))) { + link = "https://httpwg.org/specs/rfc" + $0 + ".html"; +} + var text = ""; var commonl10n = web.getJSONData('L10n-Common'); diff --git a/markdown/cli.ts b/markdown/cli.ts index 9858ed8b3d4e..33da26ae8300 100644 --- a/markdown/cli.ts +++ b/markdown/cli.ts @@ -1,5 +1,6 @@ import * as fs from "fs"; const fm = require("front-matter"); +const cheerio = require("cheerio"); import { program } from "@caporal/core"; import * as chalk from "chalk"; import * as cliProgress from "cli-progress"; @@ -8,11 +9,14 @@ import { saveFile } from "../content/document"; import { VALID_LOCALES } from "../libs/constants"; import { execGit } from "../content"; import { getRoot } from "../content/utils"; +import { render } from "../kumascript/src/render.js"; import { h2m } from "./h2m"; const { prettyAST } = require("./utils"); import { m2h } from "."; import { toSelector } from "./h2m/utils"; +const specs = require("browser-specs"); +const web = require("../kumascript/src/api/web.js"); function tryOrExit(f) { return async ({ @@ -132,6 +136,14 @@ program default: "all", validator: (Array.from(VALID_LOCALES.values()) as string[]).concat("all"), }) + .option("--prepare-spec-url-files", "Prepare files with spec URLs", { + default: false, + validator: program.BOOLEAN, + }) + .option("--add-spec-urls", "Adds spec URLs", { + default: false, + validator: program.BOOLEAN, + }) .argument("[folder]", "convert by folder") .action( tryOrExit(async ({ args, options }) => { @@ -168,7 +180,133 @@ program if (options.verbose) { console.log(doc.metadata.slug); } - const { body: h, attributes: metadata } = fm(doc.rawContent); + let { body: h, attributes: metadata } = fm(doc.rawContent); + const specURLs = []; + if (options.addSpecUrls || options.prepareSpecUrlFiles) { + const $ = cheerio.load(doc.rawBody); + const specTable = $("h2:contains('Specifications') + table"); + const tableCells = $("h2:contains('Specifications') + table td"); + for (const td of tableCells) { + if (td.children[0]) { + const tdData = td.children[0].data; + // Look for {{...}} in any Specifications table. + if ( + typeof tdData === "string" && + tdData.trim().match(/^{{.+}}$/) + ) { + const [result] = + // Render (resolve/expand) any {{..}} macro found. + await render(tdData, { slug: "", locale: "en-US" }); + const $ = cheerio.load(result); + // {{...}} macros that are spec references expand into + // ... elements + if ($("a")[0]) { + let href = $("a")[0].attribs.href; + href = href + .replace( + "www.w3.org/TR/wai-aria-1.1", + "w3c.github.io/aria" + ) + .replace( + "www.w3.org/TR/wai-aria-practices-1.2", + "w3c.github.io/aria-practices" + ) + .replace( + "www.w3.org/TR/WebCryptoAPI", + "w3c.github.io/webcrypto" + ) + .replace( + "heycam.github.io/webidl", + "webidl.spec.whatwg.org" + ) + .replace( + "wicg.github.io/InputDeviceCapabilities", + "wicg.github.io/input-device-capabilities" + ) + .replace( + "wicg.github.io/web-locks", + "w3c.github.io/web-locks" + ); + if (href && href.match("http[s]?://")) { + if (options.verbose) { + const spec = specs.find( + (spec: any) => + href.startsWith(spec.url) || + href.startsWith(spec.nightly.url) || + href.startsWith(spec.series.nightlyUrl) + ); + const specificationsData = { + bcdSpecificationURL: href, + title: "Unknown specification", + }; + if (spec) { + specificationsData.title = spec.title; + } + if ( + specificationsData.title === "Unknown specification" + ) { + const specList = web.getJSONData("SpecData"); + if ( + Object.keys(specList).find( + (key) => + specList[key]["url"] === href.split("#")[0] + ) + ) { + console.log( + chalk.red( + "⚠️ spec url not in browser-specs (but in SpecData): " + + href + ) + ); + } else { + console.log( + chalk.redBright( + "❌ spec url from unknown spec: " + href + ) + ); + } + } else { + console.log(chalk.green("✅ spec url: " + href)); + } + } + specURLs.push(href); + } + } + } + } + } + let {} = ({ body: h, attributes: metadata } = fm(doc.rawContent)); + if (specURLs.length !== 0) { + if (options.addSpecUrls) { + // Only if the --add-spec-urls option (not the + // --prepare-spec-url-files option) was specified do we + // replace Specifications tables with {{Specifications}} + // macros, and add the spec-url frontmatter key. + const p = $("

{{Specifications}}

"); + specTable.replaceWith(p); + h = $.html(); + if (metadata["browser-compat"]) { + console.log( + chalk.red( + "⚠️ browser-compat frontmatter key found;" + + " not adding spec-urls" + ) + ); + } else { + metadata["spec-urls"] = + // String, if only on spec URL; otherwise, array. + specURLs.length === 1 ? specURLs[0] : specURLs; + } + } + } else { + // --add-spec-urls or --prepare-spec-url-files was specified + // but because specURLs.length is zero, that means the + // current document we’re processing has no spec URLs, so + // skip it (don’t write any output for it), and move on to + // checking for spec URLs in the next document. + continue; + } + } const [markdown, { invalid, unhandled }] = await h2m(h, { printAST: options.printAst, locale: doc.metadata.locale,