diff --git a/.changeset/tidy-tigers-grab.md b/.changeset/tidy-tigers-grab.md new file mode 100644 index 0000000..8511960 --- /dev/null +++ b/.changeset/tidy-tigers-grab.md @@ -0,0 +1,5 @@ +--- +'@graphcms/html-to-slate-ast': minor +--- + +Update Slate; refactor types; fix pre tag handling; wrap parentless breaks in a paragraph; do not add thead to headless tables diff --git a/packages/html-to-slate-ast/package.json b/packages/html-to-slate-ast/package.json index e5234b0..65bbb16 100644 --- a/packages/html-to-slate-ast/package.json +++ b/packages/html-to-slate-ast/package.json @@ -12,14 +12,14 @@ }, "peerDependencies": { "jsdom": "^16.6.0", - "slate": "^0.58.3", - "slate-hyperscript": "^0.58.3" + "slate": "^0.65.3", + "slate-hyperscript": "^0.62.0" }, "devDependencies": { "@types/jsdom": "^16.2.11", "jsdom": "^16.6.0", - "slate": "^0.58.3", - "slate-hyperscript": "^0.58.3" + "slate": "^0.65.3", + "slate-hyperscript": "^0.62.0" }, "publishConfig": { "access": "public" diff --git a/packages/html-to-slate-ast/src/index.ts b/packages/html-to-slate-ast/src/index.ts index 05e925e..e9734e6 100644 --- a/packages/html-to-slate-ast/src/index.ts +++ b/packages/html-to-slate-ast/src/index.ts @@ -1,3 +1,4 @@ +import { BaseElement, Descendant, Text as SlateText } from 'slate'; import { jsx } from 'slate-hyperscript'; import { sanitizeUrl } from '@braintree/sanitize-url'; import type { Element, Mark } from '@graphcms/rich-text-types'; @@ -53,7 +54,7 @@ const ELEMENT_TAGS: Record< openInNewTab: true, }; }, - PRE: () => ({ type: 'pre' }), + PRE: () => ({ type: 'code-block' }), }; const TEXT_TAGS: Record< @@ -67,13 +68,21 @@ const TEXT_TAGS: Record< U: () => ({ underline: true }), }; +function deserialize( + el: Node +): string | ChildNode[] | BaseElement | Descendant[]; function deserialize(el: Node) { if (el.nodeType === 3) { return el.textContent; } else if (el.nodeType !== 1) { return null; } else if (el.nodeName === 'BR') { - return '\n'; + // wrap parentless breaks in a paragraph + if (el.parentElement?.nodeName === 'BODY') { + return jsx('element', { type: 'paragraph' }, [{ text: '' }]); + } else { + return '\n'; + } } const { nodeName } = el; @@ -86,14 +95,13 @@ function deserialize(el: Node) { ) { parent = el.childNodes[0]; } - let children = Array.from(parent.childNodes) - .map(deserialize) - .flat() as ChildNode[]; + let children = Array.from(parent.childNodes).map(deserialize).flat(); if (children.length === 0) { - if (!['COLGROUP', 'COL', 'CAPTION', 'TFOOT'].includes(nodeName)) - // @ts-expect-error - children = [{ text: '' }]; + if (!['COLGROUP', 'COL', 'CAPTION', 'TFOOT'].includes(nodeName)) { + const textNode = jsx('text', {}, ''); + children = [textNode]; + } } if (el.nodeName === 'BODY') { return jsx('fragment', {}, children); @@ -135,23 +143,20 @@ function deserialize(el: Node) { const attrs = ELEMENT_TAGS[nodeName](el as HTMLElement); // li children must be rendered in spans, like in list plugin if (nodeName === 'LI') { - const listItemChildren = children.map((child: ChildNode) => ({ - ...child, - type: 'list-item-child', - })); + const listItemChildren = children.map((child) => { + if (typeof child === 'string') { + return jsx('element', { type: 'list-item-child' }, [child]); + } else if (SlateText.isText(child)) { + return jsx('element', { type: 'list-item-child' }, [child.text]); + } else if (isChildNode(child)) { + return jsx('element', { type: 'list-item-child' }, [ + child.textContent, + ]); + } else { + return { ...child, type: 'list-item-child' }; + } + }); return jsx('element', attrs, listItemChildren); - } else if ( - nodeName === 'TABLE' && - !Array.from((el as HTMLTableElement).childNodes).find( - (node: ChildNode) => node.nodeName === 'THEAD' - ) - ) { - // tables must have thead, otherwise field crashes - const thead = { - type: 'table_head', - children: [], - }; - return jsx('element', attrs, [thead, ...children]); } else if (nodeName === 'TR') { // if TR is empty, insert a cell with a paragraph to ensure selection can be placed inside const modifiedChildren = @@ -231,13 +236,13 @@ function deserialize(el: Node) { })(); if (tagName) { const attrs = TEXT_TAGS[tagName](); - return children.map((child: ChildNode) => jsx('text', attrs, child)); + return children.map((child) => jsx('text', attrs, child)); } } if (TEXT_TAGS[nodeName]) { const attrs = TEXT_TAGS[nodeName](el as HTMLElement); - return children.map((child: ChildNode) => jsx('text', attrs, child)); + return children.map((child) => jsx('text', attrs, child)); } // general fallback @@ -321,6 +326,10 @@ function isTextNode(node: Node): node is Text { return node.nodeType === 3; } +function isChildNode(node: string | ChildNode | Descendant): node is ChildNode { + return node instanceof Node; +} + function isInlineElement(element: HTMLElement) { const allInlineElements: Array = [ 'a', @@ -390,6 +399,9 @@ const parseDomDocument = async (normalizedHTML: string) => { } }; +export function htmlToSlateAST( + html: string +): Promise; export async function htmlToSlateAST(html: string) { const normalizedHTML = normalizeHtml(html); const domDocument = await parseDomDocument(normalizedHTML); diff --git a/packages/html-to-slate-ast/test/google-docs_input.html b/packages/html-to-slate-ast/test/google-docs_input.html index b8022cc..dec1d36 100644 --- a/packages/html-to-slate-ast/test/google-docs_input.html +++ b/packages/html-to-slate-ast/test/google-docs_input.html @@ -1 +1 @@ -

Heading 1

Heading 2

Heading 3

Heading 4

Heading 5
Heading 6

Link to GH 

Unordered list:

  • One

  • Two

  • Three

Ordered list:

  1. One

  2. Two

Table:

Cell one

Cell two



\ No newline at end of file +

Heading 1

Heading 2

Heading 3

Heading 4

Heading 5
Heading 6

Link to Google 

Unordered list:

  • One

  • Two

  • Three

Ordered list:

  1. One

  2. Two

Table:

Cell one

Cell two



Screenshot 2021-06-10 at 15.56.22.png


\ No newline at end of file diff --git a/packages/html-to-slate-ast/test/index.test.ts b/packages/html-to-slate-ast/test/index.test.ts index f135688..d73df60 100644 --- a/packages/html-to-slate-ast/test/index.test.ts +++ b/packages/html-to-slate-ast/test/index.test.ts @@ -146,17 +146,17 @@ test('Transforms Google Docs input', () => { children: [ { type: 'link', - href: 'https://github.com/GraphCMS/next-webapp/pull/1034', + href: 'https://www.google.com/', openInNewTab: false, children: [ { - text: 'Link to GH', + text: 'Link to Google', underline: true, }, ], }, { - text: ' ', + text: '\u00a0', }, ], }, @@ -262,10 +262,6 @@ test('Transforms Google Docs input', () => { { type: 'table', children: [ - { - type: 'table_head', - children: [], - }, { type: 'table_body', children: [ @@ -335,6 +331,30 @@ test('Transforms Google Docs input', () => { }, ], }, + { + type: 'paragraph', + children: [ + { + type: 'link', + href: 'https://lh6.googleusercontent.com/TkJFBZvkyXTa602F0gkp2phU0O1eHu96RdKFcQ8l_EOS_CBfcI9jYRixN6sNRFnFiZ-ssbLbnLDReb3FrEZ1MnLr70c5gIvPmhJtV7appyVEDSeHLIRdNwdNzbIqs3l2GOgGLGC5=s0', + title: 'Screenshot 2021-06-10 at 15.56.22.png', + openInNewTab: true, + children: [ + { + text: 'https://lh6.googleusercontent.com/TkJFBZvkyXTa602F0gkp2phU0O1eHu96RdKFcQ8l_EOS_CBfcI9jYRixN6sNRFnFiZ-ssbLbnLDReb3FrEZ1MnLr70c5gIvPmhJtV7appyVEDSeHLIRdNwdNzbIqs3l2GOgGLGC5=s0', + }, + ], + }, + ], + }, + { + type: 'paragraph', + children: [ + { + text: '', + }, + ], + }, ]) ); }); @@ -461,7 +481,6 @@ test('Converts word documents', () => { { type: 'table', children: [ - { type: 'table_head', children: [] }, { type: 'table_body', children: [ @@ -539,10 +558,6 @@ test('Reshape an incorrectly structured table', () => { { type: 'table', children: [ - { - type: 'table_head', - children: [], - }, { type: 'table_body', children: [ @@ -589,3 +604,19 @@ test('Reshape an incorrectly structured table', () => { ]) ); }); + +test('Transforms pre tags into code-block nodes', () => { + const input = fs.readFileSync(__dirname + '/pre.html').toString(); + return htmlToSlateAST(input).then((ast) => + expect(ast).toStrictEqual([ + { + type: 'code-block', + children: [ + { + text: " L TE\n A A\n C V\n R A\n DOU\n LOU\n REUSE\n QUE TU\n PORTES\n ET QUI T'\n ORNE O CI\n VILISÉ\n OTE- TU VEUX\n LA BIEN\n SI RESPI\n RER - Apollinaire", + }, + ], + }, + ]) + ); +}); diff --git a/packages/html-to-slate-ast/test/pre.html b/packages/html-to-slate-ast/test/pre.html new file mode 100644 index 0000000..0b90db9 --- /dev/null +++ b/packages/html-to-slate-ast/test/pre.html @@ -0,0 +1,17 @@ +
+  L          TE
+    A       A
+      C    V
+       R A
+       DOU
+       LOU
+      REUSE
+      QUE TU
+      PORTES
+    ET QUI T'
+    ORNE O CI
+     VILISÉ
+    OTE-  TU VEUX
+     LA    BIEN
+    SI      RESPI
+            RER       - Apollinaire
\ No newline at end of file diff --git a/yarn.lock b/yarn.lock index d3df5e0..afdc761 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5904,7 +5904,7 @@ extsprintf@^1.2.0: resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.4.0.tgz#e2689f8f356fad62cca65a3a91c5df5f9551692f" integrity sha1-4mifjzVvrWLMplo6kcXfX5VRaS8= -fast-deep-equal@^3.1.1: +fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: version "3.1.3" resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q== @@ -6804,10 +6804,10 @@ ignore@^5.1.4: resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57" integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw== -immer@^5.0.0: - version "5.3.6" - resolved "https://registry.yarnpkg.com/immer/-/immer-5.3.6.tgz#51eab8cbbeb13075fe2244250f221598818cac04" - integrity sha512-pqWQ6ozVfNOUDjrLfm4Pt7q4Q12cGw2HUZgry4Q5+Myxu9nmHRkWBpI0J4+MK0AxbdFtdMTwEGVl7Vd+vEiK+A== +immer@^8.0.1: + version "8.0.4" + resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.4.tgz#3a21605a4e2dded852fb2afd208ad50969737b7a" + integrity sha512-jMfL18P+/6P6epANRvRk6q8t+3gGhqsJ9EuJ25AXE+9bNTYtssvzeYbEd0mXRYWCmmXSIbnlpz6vd6iJlmGGGQ== import-fresh@^2.0.0: version "2.0.0" @@ -11148,21 +11148,22 @@ slash@^3.0.0: resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634" integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== -slate-hyperscript@^0.58.3: - version "0.58.4" - resolved "https://registry.yarnpkg.com/slate-hyperscript/-/slate-hyperscript-0.58.4.tgz#430b2fc93e6a04808b9ed18414f0a9fdbc8fec79" - integrity sha512-Ix+oSvqiJ6rBsO5Bh+0n3d4+bGl8+/Bj7okwKnQA9A4hj9x1K/5+wrEm8ivO9mQP5j3ZHO4bGxt2RWxROSp93Q== +slate-hyperscript@^0.62.0: + version "0.62.0" + resolved "https://registry.yarnpkg.com/slate-hyperscript/-/slate-hyperscript-0.62.0.tgz#ff6f86e94936fbf90f8ec07d2ae8fbcd7e45a927" + integrity sha512-PbtxrrIr4qPvtPmKli4/FnvQFjf8zmnF0kyNeDQGHvUzpHnsoSYHM/i4POye3/qt5d8WGukiJBPL03QsIW+SIw== dependencies: is-plain-object "^3.0.0" -slate@^0.58.3: - version "0.58.4" - resolved "https://registry.yarnpkg.com/slate/-/slate-0.58.4.tgz#4259387e632b45b00cf88bcecf5570d7d16ddd8b" - integrity sha512-XxKwNJgCMf7S2sDT8CVJy0zYm95MiYorJo9Hah05zKjItrw0VVeCc2BGKDZSlNGcaIfM3xcfFUN7XE+c8ehAbA== +slate@^0.65.3: + version "0.65.3" + resolved "https://registry.yarnpkg.com/slate/-/slate-0.65.3.tgz#8178cdf28a10a3a4e6858b13bc2ffa7c3d003e7a" + integrity sha512-n8wa2MKyWhCMRyVkXuMf67MmOYSeoHnqS1qYivor+/y0puNvQgXDUjC7TJJqUjhVqJ6zg2IeuYd0WfSYdAJs4g== dependencies: "@types/esrever" "^0.2.0" esrever "^0.2.0" - immer "^5.0.0" + fast-deep-equal "^3.1.3" + immer "^8.0.1" is-plain-object "^3.0.0" tiny-warning "^1.0.3"