Skip to content

Commit

Permalink
Extract markdown reading into dedicated file (Qiskit#1666)
Browse files Browse the repository at this point in the history
We're going to reuse this logic in
Qiskit#1651, so it's helpful to
have in a common helper file.
  • Loading branch information
Eric-Arellano authored Jul 10, 2024
1 parent 735b1d6 commit 5706804
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 53 deletions.
37 changes: 1 addition & 36 deletions scripts/lib/links/extractLinks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,7 @@
// that they have been altered from the originals.

import { expect, test } from "@jest/globals";
import { markdownFromNotebook, parseAnchors, parseLinks } from "./extractLinks";

test("markdownFromNotebook()", () => {
const result = markdownFromNotebook(`
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Line 1.\\n",
"Line 2."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Line 3."
]
}
],
"metadata": {}
}
`);
expect(result).toBe("Line 1.\nLine 2.\n\nLine 3.");
});
import { parseAnchors, parseLinks } from "./extractLinks";

test("parseAnchors()", () => {
const result = parseAnchors(`
Expand Down
19 changes: 2 additions & 17 deletions scripts/lib/links/extractLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { readFile } from "fs/promises";
import path from "node:path";

import markdownLinkExtractor from "markdown-link-extractor";
Expand All @@ -22,6 +21,7 @@ import rehypeRemark from "rehype-remark";
import rehypeParse from "rehype-parse";
import remarkGfm from "remark-gfm";
import { ObjectsInv } from "../api/objectsInv";
import { readMarkdown } from "../markdownReader";
import { removePrefix, removeSuffix } from "../stringUtils";
import { getRoot } from "../fs";

Expand All @@ -34,19 +34,6 @@ export type ParsedFile = {
externalLinks: Set<string>;
};

interface JupyterCell {
cell_type: string;
source: string[];
}

export function markdownFromNotebook(rawContent: string): string {
const cells = JSON.parse(rawContent).cells as JupyterCell[];
return cells
.filter((cell) => cell.cell_type === "markdown")
.map((cell) => cell.source.join(""))
.join("\n\n");
}

export function parseAnchors(markdown: string): Set<string> {
// Anchors generated from markdown titles.
const mdAnchors = markdownLinkExtractor(markdown).anchors;
Expand Down Expand Up @@ -109,9 +96,7 @@ export async function parseFile(filePath: string): Promise<ParsedFile> {
};
}

const source = await readFile(filePath, { encoding: "utf8" });
const markdown =
path.extname(filePath) === ".ipynb" ? markdownFromNotebook(source) : source;
const markdown = await readMarkdown(filePath);
const [internalLinks, externalLinks] = await parseLinks(markdown);
return { anchors: parseAnchors(markdown), internalLinks, externalLinks };
}
49 changes: 49 additions & 0 deletions scripts/lib/markdownReader.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { expect, test } from "@jest/globals";
import { markdownFromNotebook } from "./markdownReader";

test("markdownFromNotebook()", () => {
const result = markdownFromNotebook(`
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Line 1.\\n",
"Line 2."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Line 3."
]
}
],
"metadata": {}
}
`);
expect(result).toBe("Line 1.\nLine 2.\n\nLine 3.");
});
34 changes: 34 additions & 0 deletions scripts/lib/markdownReader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { readFile } from "fs/promises";
import path from "node:path";

export async function readMarkdown(filePath: string): Promise<string> {
const source = await readFile(filePath, { encoding: "utf8" });
return path.extname(filePath) === ".ipynb"
? markdownFromNotebook(source)
: source;
}

interface JupyterCell {
cell_type: string;
source: string[];
}

export function markdownFromNotebook(rawContent: string): string {
const cells = JSON.parse(rawContent).cells as JupyterCell[];
return cells
.filter((cell) => cell.cell_type === "markdown")
.map((cell) => cell.source.join(""))
.join("\n\n");
}

0 comments on commit 5706804

Please sign in to comment.