Skip to content

feat: Parsing blocks from external HTML #414

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/core/src/api/nodeConversions/nodeConversions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ export function blockToNode<BSchema extends BlockSchema>(
/**
* Converts an internal (prosemirror) content node to a BlockNote InlineContent array.
*/
function contentNodeToInlineContent(contentNode: Node) {
export function contentNodeToInlineContent(contentNode: Node) {
const content: InlineContent[] = [];
let currentContent: InlineContent | undefined = undefined;

Expand Down
24 changes: 21 additions & 3 deletions packages/core/src/api/serialization/clipboardHandlerExtension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ import { BlockSchema } from "../../extensions/Blocks/api/blockTypes";
import { BlockNoteEditor } from "../../BlockNoteEditor";
import { Extension } from "@tiptap/core";
import { Plugin } from "prosemirror-state";
import { Fragment, Slice } from "prosemirror-model";
import { createInternalHTMLSerializer } from "./html/internalHTMLSerializer";
import { createExternalHTMLExporter } from "./html/externalHTMLExporter";
import { markdown } from "../formatConversions/formatConversions";
import { blockToNode } from "../nodeConversions/nodeConversions";
import { parseExternalHTML } from "./html/parseExternalHTML";

const acceptedMIMETypes = [
"blocknote/html",
Expand Down Expand Up @@ -73,9 +76,24 @@ export const createClipboardHandlerExtension = <BSchema extends BlockSchema>(
}

if (format !== null) {
editor._tiptapEditor.view.pasteHTML(
event.clipboardData!.getData(format!)
);
// Use custom parser if only text/html is available (i.e. when
// pasting content from external source)
if (format === "text/html") {
const html = event.clipboardData!.getData(format);
const parsedNodes = parseExternalHTML(html, editor).map(
(block) => blockToNode(block, schema)
);

editor._tiptapEditor.view.dispatch(
editor._tiptapEditor.view.state.tr.replaceSelection(
new Slice(Fragment.from(parsedNodes), 0, 0)
)
);
} else {
editor._tiptapEditor.view.pasteHTML(
event.clipboardData!.getData(format!)
);
}
}

return true;
Expand Down
198 changes: 196 additions & 2 deletions packages/core/src/api/serialization/html/htmlConversion.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { uploadToTmpFilesDotOrg_DEV_ONLY } from "../../../extensions/Blocks/node
import { createInternalHTMLSerializer } from "./internalHTMLSerializer";
import { createExternalHTMLExporter } from "./externalHTMLExporter";
import { defaultProps } from "../../../extensions/Blocks/api/defaultProps";
import { parseExternalHTML } from "./parseExternalHTML";

// This is a modified version of the default image block that does not implement
// a `serialize` function. It's used to test if the custom serializer by default
Expand Down Expand Up @@ -99,7 +100,7 @@ function convertToHTMLAndCompareSnapshots(
const serializer = createInternalHTMLSerializer(tt.schema, editor);
const internalHTML = serializer.serializeBlocks(blocks);
const internalHTMLSnapshotPath =
"./__snapshots__/" +
"./__snapshots__/copy/" +
snapshotDirectory +
"/" +
snapshotName +
Expand All @@ -109,7 +110,7 @@ function convertToHTMLAndCompareSnapshots(
const exporter = createExternalHTMLExporter(tt.schema, editor);
const externalHTML = exporter.exportBlocks(blocks);
const externalHTMLSnapshotPath =
"./__snapshots__/" +
"./__snapshots__/copy/" +
snapshotDirectory +
"/" +
snapshotName +
Expand Down Expand Up @@ -454,3 +455,196 @@ describe("Convert custom blocks with non-exported inline content to HTML", () =>
convertToHTMLAndCompareSnapshots(blocks, "simpleCustomParagraph", "nested");
});
});

function parseHTMLAndCompareSnapshots(html: string, snapshotName: string) {
const blocks = parseExternalHTML(html, editor);

const snapshotPath = "./__snapshots__/paste/" + snapshotName + ".json";
expect(JSON.stringify(blocks)).toMatchFileSnapshot(snapshotPath);
}

describe("Parse HTML", () => {
it("Parse basic block types", async () => {
const html = `<h1>Heading 1</h1>
<h2>Heading 2</h2>
<h3>Heading 3</h3>
<p>Paragraph</p>
<figure><img src="exampleURL"><figcaption>Image Caption</figcaption></figure>
<p>None <strong>Bold </strong><em>Italic </em><u>Underline </u><s>Strikethrough </s><strong><em><s><u>All</u></s></em></strong></p>`;

parseHTMLAndCompareSnapshots(html, "parse-basic-block-types");
});

it("Parse nested lists", async () => {
const html = `<ul>
<li>
Bullet List Item
<ul>
<li>
Nested Bullet List Item
</li>
<li>
Nested Bullet List Item
</li>
</ul>
</li>
<li>
Bullet List Item
</li>
</ul>
<ol>
<li>
Numbered List Item
<ol>
<li>
Nested Numbered List Item
</li>
<li>
Nested Numbered List Item
</li>
</ol>
</li>
<li>
Numbered List Item
</li>
</ol>`;

parseHTMLAndCompareSnapshots(html, "parse-nested-lists");
});

it("Parse nested lists with paragraphs", async () => {
const html = `<ul>
<li>
<p>Bullet List Item</p>
<ul>
<li>
<p>Nested Bullet List Item</p>
</li>
<li>
<p>Nested Bullet List Item</p>
</li>
</ul>
</li>
<li>
<p>Bullet List Item</p>
</li>
</ul>
<ol>
<li>
<p>Numbered List Item</p>
<ol>
<li>
<p>Nested Numbered List Item</p>
</li>
<li>
<p>Nested Numbered List Item</p>
</li>
</ol>
</li>
<li>
<p>Numbered List Item</p>
</li>
</ol>`;

parseHTMLAndCompareSnapshots(html, "parse-nested-lists-with-paragraphs");
});

it("Parse mixed nested lists", async () => {
const html = `<ul>
<li>
Bullet List Item
<ol>
<li>
Nested Numbered List Item
</li>
<li>
Nested Numbered List Item
</li>
</ol>
</li>
<li>
Bullet List Item
</li>
</ul>
<ol>
<li>
Numbered List Item
<ul>
<li>
<p>Nested Bullet List Item</p>
</li>
<li>
<p>Nested Bullet List Item</p>
</li>
</ul>
</li>
<li>
Numbered List Item
</li>
</ol>`;

parseHTMLAndCompareSnapshots(html, "parse-mixed-nested-lists");
});

it("Parse divs", async () => {
const html = `<div>Single Div</div>
<div>
Div
<div>Nested Div</div>
<div>Nested Div</div>
</div>
<div>Single Div</div>
<div>
<div>Nested Div</div>
<div>Nested Div</div>
</div>`;

parseHTMLAndCompareSnapshots(html, "parse-divs");
});

it("Parse fake image caption", async () => {
const html = `<div>
<img src="exampleURL">
<p>Image Caption</p>
</div>`;

parseHTMLAndCompareSnapshots(html, "parse-fake-image-caption");
});

it("Parse deep nested content", async () => {
const html = `<div>
Outer 1 Div Before
<div>
Outer 2 Div Before
<div>
Outer 3 Div Before
<div>
Outer 4 Div Before
<h1>Heading 1</h1>
<h2>Heading 2</h2>
<h3>Heading 3</h3>
<p>Paragraph</p>
<figure><img src="exampleURL"><figcaption>Image Caption</figcaption></figure>
<p><strong>Bold</strong> <em>Italic</em> <u>Underline</u> <s>Strikethrough</s> <strong><em><s><u>All</u></s></em></strong></p>
Outer 4 Div After
</div>
Outer 3 Div After
</div>
Outer 2 Div After
</div>
Outer 1 Div After
</div>`;

parseHTMLAndCompareSnapshots(html, "parse-deep-nested-content");
});

it("Parse div with inline content and nested blocks", async () => {
const html = `<div>
None <strong>Bold </strong><em>Italic </em><u>Underline </u><s>Strikethrough </s><strong><em><s><u>All</u></s></em></strong>
<div>Nested Div</div>
<p>Nested Paragraph</p>
</div>`;

parseHTMLAndCompareSnapshots(html, "parse-div-with-inline-content");
});
});
Loading