diff --git a/package-lock.json b/package-lock.json index 5d66fe00002..4fbfbf88f53 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,9 @@ "name": "docs", "version": "0.1.0", "license": "Apache-2.0", + "dependencies": { + "markdown-link-extractor": "^3.1.0" + }, "devDependencies": { "@swc/jest": "^0.2.29", "@types/jest": "^29.5.5", @@ -2931,8 +2934,7 @@ "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", - "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", - "dev": true + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" }, "node_modules/brace-expansion": { "version": "1.1.11", @@ -3147,7 +3149,6 @@ "version": "1.0.0-rc.12", "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz", "integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==", - "dev": true, "dependencies": { "cheerio-select": "^2.1.0", "dom-serializer": "^2.0.0", @@ -3168,7 +3169,6 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", - "dev": true, "dependencies": { "boolbase": "^1.0.0", "css-select": "^5.1.0", @@ -3713,7 +3713,6 @@ "version": "5.1.0", "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", - "dev": true, "dependencies": { "boolbase": "^1.0.0", "css-what": "^6.1.0", @@ -3729,7 +3728,6 @@ "version": "6.1.0", "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", - "dev": true, "engines": { "node": ">= 6" }, @@ -3851,7 +3849,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", - "dev": true, "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.2", @@ -3865,7 +3862,6 @@ "version": "2.3.0", "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", - "dev": true, "funding": [ { "type": "github", @@ -3877,7 +3873,6 @@ "version": "5.0.3", "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", - "dev": true, "dependencies": { "domelementtype": "^2.3.0" }, @@ -3892,7 +3887,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", - "dev": true, "dependencies": { "dom-serializer": "^2.0.0", "domelementtype": "^2.3.0", @@ -3951,7 +3945,6 @@ "version": "4.5.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", - "dev": true, "engines": { "node": ">=0.12" }, @@ -4753,11 +4746,18 @@ "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", "dev": true }, + "node_modules/html-link-extractor": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/html-link-extractor/-/html-link-extractor-1.0.5.tgz", + "integrity": "sha512-ADd49pudM157uWHwHQPUSX4ssMsvR/yHIswOR5CUfBdK9g9ZYGMhVSE6KZVHJ6kCkR0gH4htsfzU6zECDNVwyw==", + "dependencies": { + "cheerio": "^1.0.0-rc.10" + } + }, "node_modules/htmlparser2": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", - "dev": true, "funding": [ "https://github.com/fb55/htmlparser2?sponsor=1", { @@ -6219,6 +6219,15 @@ "integrity": "sha512-CkYQrPYZfWnu/DAmVCpTSX/xHpKZ80eKh2lAkyA6AJTef6bW+6JpbQZN5rofum7da+SyN1bi5ctTm+lTfcCW3g==", "dev": true }, + "node_modules/markdown-link-extractor": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/markdown-link-extractor/-/markdown-link-extractor-3.1.0.tgz", + "integrity": "sha512-r0NEbP1dsM+IqB62Ru9TXLP/HDaTdBNIeylYXumuBi6Xv4ufjE1/g3TnslYL8VNqNcGAGbMptQFHrrdfoZ/Sug==", + "dependencies": { + "html-link-extractor": "^1.0.5", + "marked": "^4.1.0" + } + }, "node_modules/markdown-table": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.3.tgz", @@ -6229,6 +6238,17 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/marked": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz", + "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 12" + } + }, "node_modules/mdast": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/mdast/-/mdast-3.0.0.tgz", @@ -7413,7 +7433,6 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", - "dev": true, "dependencies": { "boolbase": "^1.0.0" }, @@ -7578,7 +7597,6 @@ "version": "7.1.2", "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", - "dev": true, "dependencies": { "entities": "^4.4.0" }, @@ -7590,7 +7608,6 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz", "integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==", - "dev": true, "dependencies": { "domhandler": "^5.0.2", "parse5": "^7.0.0" diff --git a/package.json b/package.json index 94f1b31e27e..bf8e21c0699 100644 --- a/package.json +++ b/package.json @@ -44,5 +44,8 @@ "unist-util-visit": "^4.0.0", "yargs": "^17.7.2", "zx": "^7.2.3" + }, + "dependencies": { + "markdown-link-extractor": "^3.1.0" } } diff --git a/scripts/commands/checkLinks.ts b/scripts/commands/checkLinks.ts new file mode 100644 index 00000000000..c936163c238 --- /dev/null +++ b/scripts/commands/checkLinks.ts @@ -0,0 +1,140 @@ +// This code is a Qiskit project. +// +// (C) Copyright IBM 2023. +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +import { globby } from "globby"; +import { existsSync, readFileSync } from 'fs'; +import path from 'node:path'; +import markdownLinkExtractor from 'markdown-link-extractor'; + +const DOCS_ROOT = "./docs" +const CONTENT_FILE_EXTENSIONS = [".md", ".mdx", ".ipynb"] + +class Link { + readonly value: string + readonly anchor: string + readonly origin: string + readonly isExternal: boolean + + constructor(linkString: string, origin: string) { + /* + * linkString: Link as it appears in source file + * origin: Path to source file containing link + */ + + const splitLink = linkString.split('#', 1) + this.value = splitLink[0] + this.anchor = (splitLink.length > 1) ? `#${splitLink[1]}` : '' + this.origin = origin + this.isExternal = linkString.startsWith("http") + } + + resolve(): string[] { + /* + * Return list of possible paths link could resolve to + */ + if ( this.isExternal ) { return [ this.value ] } + if ( this.value === '' ) { return [ this.origin ] } // link is just anchor + if ( this.value.startsWith("/images") ) { + return [ path.join("public/", this.value) ] + } + + let baseFilePath + if (this.value.startsWith('/')) { + // Path is relative to DOCS_ROOT + baseFilePath = path.join(DOCS_ROOT, this.value) + } else { + // Path is relative to origin file + baseFilePath = path.join( + path.dirname(this.origin), + this.value + ) + } + // Remove trailing '/' from path.join + baseFilePath = baseFilePath.replace(/\/$/gm, '') + + // File may have one of many extensions (.md, .ipynb etc.), and/or be + // directory with an index file (e.g. `docs/build` should resolve to + // `docs/build/index.mdx`). We return a list of possible filenames. + let possibleFilePaths = [] + for (let index of ['', '/index']) { + for (let extension of CONTENT_FILE_EXTENSIONS) { + possibleFilePaths.push( + baseFilePath + index + extension + ) + } + } + return possibleFilePaths + } + + check(filePathCache: string[] = []): boolean { + /* + * True if link points to existing file, otherwise false + * filePathCache: array of known existing files (to reduce disk I/O) + */ + if (this.isExternal) { + // External link checking not supported yet + return true + } + + const possiblePaths = this.resolve() + for (let filePath of possiblePaths) { + if (filePathCache.includes(filePath)) { + return true + } + } + // Check disk for files not in cache (images etc.) + for (let filePath of possiblePaths) { + if (existsSync(filePath)) { + return true + } + } + + console.log(`❌ ${this.origin}: Could not find link '${this.value}'`) + return false + } +} + +function markdownFromNotebook(source: string): string { + let markdown = '' + for (let cell of JSON.parse(source).cells) { + if (cell.source === 'markdown') { + markdown += cell.source + } + } + return markdown +} + +function checkLinksInFile(filePath: string, filePaths: string[]): boolean { + const source = readFileSync(filePath, {encoding: 'utf8'}) + const markdown = (path.extname(filePath) === '.ipynb') ? markdownFromNotebook(source) : source + const links = markdownLinkExtractor(source).links.map((x: string) => new Link(x, filePath)) + + let allGood = true + for (let link of links) { + allGood = link.check(filePaths) && allGood + } + return allGood +} + +async function main() { + const filePaths = await globby('docs/**/*.{ipynb,md,mdx}') + let allGood = true + for (let sourceFile of filePaths) { + allGood = checkLinksInFile(sourceFile, filePaths) && allGood + } + if (!allGood) { + console.log("\nSome links appear broken 💔\n") + process.exit(1) + } +} + +main() diff --git a/scripts/commands/declarations.d.ts b/scripts/commands/declarations.d.ts new file mode 100644 index 00000000000..02bd1277c22 --- /dev/null +++ b/scripts/commands/declarations.d.ts @@ -0,0 +1,9 @@ +type LinkExtractionResult = { + links: string[]; + anchors: string[]; +} + +declare module 'markdown-link-extractor' { + function markdownLinkExtractor(string): LinkExtractionResult; + export = markdownLinkExtractor; +}