Extract markdown reading into dedicated file (Qiskit#1666)

We're going to reuse this logic in Qiskit#1651, so it's helpful to have in a common helper file.
frankharkins · Jul 10, 2024 · 5706804 · 5706804
1 parent 735b1d6
commit 5706804
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 53 deletions.
diff --git a/scripts/lib/links/extractLinks.test.ts b/scripts/lib/links/extractLinks.test.ts
@@ -11,42 +11,7 @@
 // that they have been altered from the originals.
 
 import { expect, test } from "@jest/globals";
-import { markdownFromNotebook, parseAnchors, parseLinks } from "./extractLinks";
-
-test("markdownFromNotebook()", () => {
-  const result = markdownFromNotebook(`
-    {
-        "cells": [
-            {
-                "attachments": {},
-                "cell_type": "markdown",
-                "metadata": {},
-                "source": [
-                    "Line 1.\\n",
-                    "Line 2."
-                ]
-            },
-            {
-                "cell_type": "code",
-                "execution_count": 1,
-                "metadata": {},
-                "outputs": [],
-                "source": []
-            },
-            {
-                "attachments": {},
-                "cell_type": "markdown",
-                "metadata": {},
-                "source": [
-                    "Line 3."
-                ]
-            }
-        ],
-        "metadata": {}
-    }
-  `);
-  expect(result).toBe("Line 1.\nLine 2.\n\nLine 3.");
-});
+import { parseAnchors, parseLinks } from "./extractLinks";
 
 test("parseAnchors()", () => {
   const result = parseAnchors(`

diff --git a/scripts/lib/links/extractLinks.ts b/scripts/lib/links/extractLinks.ts
@@ -10,7 +10,6 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
-import { readFile } from "fs/promises";
 import path from "node:path";
 
 import markdownLinkExtractor from "markdown-link-extractor";
@@ -22,6 +21,7 @@ import rehypeRemark from "rehype-remark";
 import rehypeParse from "rehype-parse";
 import remarkGfm from "remark-gfm";
 import { ObjectsInv } from "../api/objectsInv";
+import { readMarkdown } from "../markdownReader";
 import { removePrefix, removeSuffix } from "../stringUtils";
 import { getRoot } from "../fs";
 
@@ -34,19 +34,6 @@ export type ParsedFile = {
   externalLinks: Set<string>;
 };
 
-interface JupyterCell {
-  cell_type: string;
-  source: string[];
-}
-
-export function markdownFromNotebook(rawContent: string): string {
-  const cells = JSON.parse(rawContent).cells as JupyterCell[];
-  return cells
-    .filter((cell) => cell.cell_type === "markdown")
-    .map((cell) => cell.source.join(""))
-    .join("\n\n");
-}
-
 export function parseAnchors(markdown: string): Set<string> {
   // Anchors generated from markdown titles.
   const mdAnchors = markdownLinkExtractor(markdown).anchors;
@@ -109,9 +96,7 @@ export async function parseFile(filePath: string): Promise<ParsedFile> {
     };
   }
 
-  const source = await readFile(filePath, { encoding: "utf8" });
-  const markdown =
-    path.extname(filePath) === ".ipynb" ? markdownFromNotebook(source) : source;
+  const markdown = await readMarkdown(filePath);
   const [internalLinks, externalLinks] = await parseLinks(markdown);
   return { anchors: parseAnchors(markdown), internalLinks, externalLinks };
 }
diff --git a/scripts/lib/markdownReader.test.ts b/scripts/lib/markdownReader.test.ts
@@ -0,0 +1,49 @@
+// This code is a Qiskit project.
+//
+// (C) Copyright IBM 2024.
+//
+// This code is licensed under the Apache License, Version 2.0. You may
+// obtain a copy of this license in the LICENSE file in the root directory
+// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+//
+// Any modifications or derivative works of this code must retain this
+// copyright notice, and modified files need to carry a notice indicating
+// that they have been altered from the originals.
+
+import { expect, test } from "@jest/globals";
+import { markdownFromNotebook } from "./markdownReader";
+
+test("markdownFromNotebook()", () => {
+  const result = markdownFromNotebook(`
+    {
+        "cells": [
+            {
+                "attachments": {},
+                "cell_type": "markdown",
+                "metadata": {},
+                "source": [
+                    "Line 1.\\n",
+                    "Line 2."
+                ]
+            },
+            {
+                "cell_type": "code",
+                "execution_count": 1,
+                "metadata": {},
+                "outputs": [],
+                "source": []
+            },
+            {
+                "attachments": {},
+                "cell_type": "markdown",
+                "metadata": {},
+                "source": [
+                    "Line 3."
+                ]
+            }
+        ],
+        "metadata": {}
+    }
+  `);
+  expect(result).toBe("Line 1.\nLine 2.\n\nLine 3.");
+});
diff --git a/scripts/lib/markdownReader.ts b/scripts/lib/markdownReader.ts
@@ -0,0 +1,34 @@
+// This code is a Qiskit project.
+//
+// (C) Copyright IBM 2024.
+//
+// This code is licensed under the Apache License, Version 2.0. You may
+// obtain a copy of this license in the LICENSE file in the root directory
+// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+//
+// Any modifications or derivative works of this code must retain this
+// copyright notice, and modified files need to carry a notice indicating
+// that they have been altered from the originals.
+
+import { readFile } from "fs/promises";
+import path from "node:path";
+
+export async function readMarkdown(filePath: string): Promise<string> {
+  const source = await readFile(filePath, { encoding: "utf8" });
+  return path.extname(filePath) === ".ipynb"
+    ? markdownFromNotebook(source)
+    : source;
+}
+
+interface JupyterCell {
+  cell_type: string;
+  source: string[];
+}
+
+export function markdownFromNotebook(rawContent: string): string {
+  const cells = JSON.parse(rawContent).cells as JupyterCell[];
+  return cells
+    .filter((cell) => cell.cell_type === "markdown")
+    .map((cell) => cell.source.join(""))
+    .join("\n\n");
+}