From 6782f57b482ad3dcfa8bb90a4ea8a4d0249613da Mon Sep 17 00:00:00 2001 From: David Ortner Date: Fri, 13 Dec 2024 02:19:57 +0100 Subject: [PATCH] chore: [#1615] Continues on implementation --- package-lock.json | 2 +- packages/happy-dom/package.json | 1 - packages/happy-dom/src/dom/DOMTokenList.ts | 2 +- .../happy-dom/src/html-parser/HTMLParser.ts | 37 +- .../src/html-serializer/HTMLSerializer.ts | 17 +- .../happy-dom/src/nodes/document/Document.ts | 2 +- .../src/nodes/element/HTMLCollection.ts | 2 +- .../src/nodes/element/NamedNodeMap.ts | 2 +- .../nodes/element/NamedNodeMapProxyFactory.ts | 2 +- .../html-form-element/HTMLFormElement.ts | 2 +- .../nodes/html-media-element/TextTrackList.ts | 2 +- .../html-select-element/HTMLSelectElement.ts | 2 +- packages/happy-dom/src/nodes/node/NodeList.ts | 2 +- packages/happy-dom/src/storage/Storage.ts | 2 +- packages/happy-dom/src/svg/SVGLengthList.ts | 2 +- packages/happy-dom/src/svg/SVGNumberList.ts | 2 +- packages/happy-dom/src/svg/SVGPointList.ts | 2 +- packages/happy-dom/src/svg/SVGStringList.ts | 2 +- .../happy-dom/src/svg/SVGTransformList.ts | 2 +- .../src/{ => utilities}/ClassMethodBinder.ts | 0 .../src/{ => utilities}/StringUtility.ts | 0 .../src/utilities/XMLEncodeUtility.ts | 73 ++ .../happy-dom/src/xml-parser/XMLParser.ts | 78 ++- .../src/xml-serializer/XMLSerializer.ts | 66 +- .../test/html-parser/HTMLParser.test.ts | 632 +++++++++++------- .../test/xml-parser/XMLParser.test.ts | 164 +---- .../test/xml-serializer/XMLSerializer.test.ts | 290 +++++++- 27 files changed, 936 insertions(+), 454 deletions(-) rename packages/happy-dom/src/{ => utilities}/ClassMethodBinder.ts (100%) rename packages/happy-dom/src/{ => utilities}/StringUtility.ts (100%) create mode 100644 packages/happy-dom/src/utilities/XMLEncodeUtility.ts diff --git a/package-lock.json b/package-lock.json index a57a3c83..678639bd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4892,6 +4892,7 @@ "version": "4.5.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=0.12" @@ -11798,7 +11799,6 @@ "version": "0.0.0", "license": "MIT", "dependencies": { - "entities": "^4.5.0", "webidl-conversions": "^7.0.0", "whatwg-mimetype": "^3.0.0" }, diff --git a/packages/happy-dom/package.json b/packages/happy-dom/package.json index f7877cc2..9a817ab1 100644 --- a/packages/happy-dom/package.json +++ b/packages/happy-dom/package.json @@ -77,7 +77,6 @@ "test:debug": "vitest run --inspect-brk --no-file-parallelism" }, "dependencies": { - "entities": "^4.5.0", "webidl-conversions": "^7.0.0", "whatwg-mimetype": "^3.0.0" }, diff --git a/packages/happy-dom/src/dom/DOMTokenList.ts b/packages/happy-dom/src/dom/DOMTokenList.ts index 9d3a1551..12ff8add 100644 --- a/packages/happy-dom/src/dom/DOMTokenList.ts +++ b/packages/happy-dom/src/dom/DOMTokenList.ts @@ -1,4 +1,4 @@ -import ClassMethodBinder from '../ClassMethodBinder.js'; +import ClassMethodBinder from '../utilities/ClassMethodBinder.js'; import Element from '../nodes/element/Element.js'; import * as PropertySymbol from '../PropertySymbol.js'; diff --git a/packages/happy-dom/src/html-parser/HTMLParser.ts b/packages/happy-dom/src/html-parser/HTMLParser.ts index 0695dab0..ae5d54e3 100755 --- a/packages/happy-dom/src/html-parser/HTMLParser.ts +++ b/packages/happy-dom/src/html-parser/HTMLParser.ts @@ -7,15 +7,15 @@ import HTMLLinkElement from '../nodes/html-link-element/HTMLLinkElement.js'; import Node from '../nodes/node/Node.js'; import DocumentFragment from '../nodes/document-fragment/DocumentFragment.js'; import HTMLElementConfig from '../config/HTMLElementConfig.js'; -import * as Entities from 'entities'; import HTMLElementConfigContentModelEnum from '../config/HTMLElementConfigContentModelEnum.js'; import SVGElementConfig from '../config/SVGElementConfig.js'; -import StringUtility from '../StringUtility.js'; +import StringUtility from '../utilities/StringUtility.js'; import BrowserWindow from '../window/BrowserWindow.js'; import DocumentType from '../nodes/document-type/DocumentType.js'; import HTMLHeadElement from '../nodes/html-head-element/HTMLHeadElement.js'; import HTMLBodyElement from '../nodes/html-body-element/HTMLBodyElement.js'; import HTMLHtmlElement from '../nodes/html-html-element/HTMLHtmlElement.js'; +import XMLEncodeUtility from '../utilities/XMLEncodeUtility.js'; /** * Markup RegExp. @@ -55,6 +55,11 @@ const ATTRIBUTE_REGEXP = */ const DOCUMENT_TYPE_ATTRIBUTE_REGEXP = /"([^"]+)"/gm; +/** + * Space RegExp. + */ +const SPACE_REGEXP = /\s+/; + /** * Space in the beginning of string RegExp. */ @@ -235,7 +240,6 @@ export default class HTMLParser { // If "nextElement" is set to null, the tag is not allowed (, and are not allowed in an HTML fragment or to be nested). this.currentNode = this.rootNode; this.readState = MarkupReadStateEnum.startOrEndTag; - this.startTagIndex = this.markupRegExp.lastIndex; } } break; @@ -283,7 +287,7 @@ export default class HTMLParser { : text; if (htmlText) { - const textNode = document.createTextNode(Entities.decodeHTML(htmlText)); + const textNode = document.createTextNode(XMLEncodeUtility.decodeTextContent(htmlText)); if ( this.currentNode === head && level === HTMLDocumentStructureLevelEnum.additionalHeadWithoutBody @@ -306,7 +310,7 @@ export default class HTMLParser { } } } else { - const textNode = document.createTextNode(Entities.decodeHTML(text)); + const textNode = document.createTextNode(XMLEncodeUtility.decodeTextContent(text)); this.currentNode[PropertySymbol.appendChild](textNode, true); } } @@ -339,11 +343,11 @@ export default class HTMLParser { const name = attributeMatch[1] || attributeMatch[3] || attributeMatch[6] || attributeMatch[9] || ''; const rawValue = attributeMatch[2] || attributeMatch[4] || attributeMatch[7] || ''; - const value = rawValue ? Entities.decodeHTMLAttribute(rawValue) : ''; + const value = rawValue ? XMLEncodeUtility.decodeAttributeValue(rawValue) : ''; const attributes = this.nextElement[PropertySymbol.attributes]; if (this.nextElement[PropertySymbol.namespaceURI] === NamespaceURI.svg) { - // In the SVG namespaces (when not parsing as XML), the attribute "xmlns" should be set to the "http://www.w3.org/2000/xmlns/" namespace. + // In the SVG namespace, the attribute "xmlns" should be set to the "http://www.w3.org/2000/xmlns/" namespace. const namespaceURI = name === 'xmlns' ? NamespaceURI.xmlns : null; if (!attributes.getNamedItemNS(namespaceURI, name)) { @@ -452,6 +456,8 @@ export default class HTMLParser { ? MarkupReadStateEnum.plainTextContent : MarkupReadStateEnum.startOrEndTag; } + + this.startTagIndex = this.markupRegExp.lastIndex; } /** @@ -460,9 +466,14 @@ export default class HTMLParser { * @param tagName Tag name. */ private parseEndTag(tagName: string): void { - // We close all tags up until the first tag that matches the end tag. - const index = this.tagNameStack.lastIndexOf(StringUtility.asciiUpperCase(tagName)); + // SVG elements are case-sensitive. + const name = + this.currentNode[PropertySymbol.namespaceURI] === NamespaceURI.html + ? StringUtility.asciiUpperCase(tagName) + : SVGElementConfig[StringUtility.asciiLowerCase(tagName)]?.localName || tagName; + const index = this.tagNameStack.lastIndexOf(name); + // We close all tags up until the first tag that matches the end tag. if (index !== -1) { this.nodeStack.splice(index, this.nodeStack.length - index); this.tagNameStack.splice(index, this.tagNameStack.length - index); @@ -477,7 +488,7 @@ export default class HTMLParser { */ private parseComment(comment: string): void { const document = this.window.document; - const commentNode = document.createComment(Entities.decodeHTML(comment)); + const commentNode = document.createComment(XMLEncodeUtility.decodeTextContent(comment)); if (this.documentStructure) { const level = this.documentStructure.level; @@ -511,7 +522,7 @@ export default class HTMLParser { * @param text Text. */ private parseDocumentType(text: string): void { - const decodedText = Entities.decodeHTML(text); + const decodedText = XMLEncodeUtility.decodeTextContent(text); if (this.documentStructure) { const { doctype } = this.documentStructure.nodes; @@ -565,7 +576,7 @@ export default class HTMLParser { // Plain text elements such as @@ -367,14 +386,14 @@ describe('HTMLParser', () => { ` ); - expect((root.children[0].children[0]).innerText).toBe( + expect((result.children[0].children[0]).innerText).toBe( `if(11){console.log("1")}` ); - expect((root.children[0].children[1]).innerText).toBe(''); - expect((root.children[0].children[2]).innerText).toBe(''); + expect((result.children[0].children[1]).innerText).toBe(''); + expect((result.children[0].children[2]).innerText).toBe(''); - expect(new HTMLSerializer().serializeToString(root)).toBe( + expect(new HTMLSerializer().serializeToString(result)).toBe( `
@@ -382,7 +401,7 @@ describe('HTMLParser', () => {
` ); - const root2 = new HTMLParser(window, { mode: HTMLParserModeEnum.htmlDocument }).parse( + const root2 = new HTMLParser(window).parse( ` Title @@ -390,7 +409,8 @@ describe('HTMLParser', () => { - ` + `, + document.implementation.createHTMLDocument() ); expect((root2.children[0].children[1].children[0]).innerText).toBe( 'var vars = []; for (var i=0;i { }); it('Handles unclosed regular elements.', () => { - const root = new HTMLParser(window).parse(`
test`); + const result = new HTMLParser(window).parse(`
test`); - expect(root.childNodes.length).toBe(1); - expect((root.childNodes[0]).tagName).toBe('DIV'); - expect(root.childNodes[0].childNodes[0].nodeType).toBe(Node.TEXT_NODE); + expect(result.childNodes.length).toBe(1); + expect((result.childNodes[0]).tagName).toBe('DIV'); + expect(result.childNodes[0].childNodes[0].nodeType).toBe(Node.TEXT_NODE); }); - it('Parses an SVG with "xmlns" set to HTML.', () => { - const root = new HTMLParser(window).parse( + it('Parses an SVG with "xmlns" set to SVG.', () => { + const result = new HTMLParser(window).parse( `
- + @@ -423,7 +443,41 @@ describe('HTMLParser', () => { ` ); - const div = root.children[0]; + expect(new HTMLSerializer().serializeToString(result)).toBe( + ` +
+ + + + + + + + + + +
+ ` + ); + + expect(new XMLSerializer().serializeToString(result)).toBe( + ` +
+ + + + + + + + + + +
+ ` + ); + + const div = result.children[0]; const svg = div.children[0]; const circle = svg.children[0]; @@ -456,7 +510,7 @@ describe('HTMLParser', () => { expect(svg.attributes[2].ownerDocument === document).toBe(true); expect(svg.attributes[3].name).toBe('xmlns'); - expect(svg.attributes[3].value).toBe(NamespaceURI.html); + expect(svg.attributes[3].value).toBe(NamespaceURI.svg); expect(svg.attributes[3].namespaceURI).toBe(NamespaceURI.xmlns); expect(svg.attributes[3].specified).toBe(true); expect(svg.attributes[3].ownerElement === svg).toBe(true); @@ -484,13 +538,32 @@ describe('HTMLParser', () => { expect(svg.attributes['fill'].ownerDocument === document).toBe(true); expect(svg.attributes['xmlns'].name).toBe('xmlns'); - expect(svg.attributes['xmlns'].value).toBe(NamespaceURI.html); + expect(svg.attributes['xmlns'].value).toBe(NamespaceURI.svg); expect(svg.attributes['xmlns'].namespaceURI).toBe(NamespaceURI.xmlns); expect(svg.attributes['xmlns'].specified).toBe(true); expect(svg.attributes['xmlns'].ownerElement === svg).toBe(true); expect(svg.attributes['xmlns'].ownerDocument === document).toBe(true); + }); - expect(new HTMLSerializer().serializeToString(root)).toBe( + it('Parses an SVG with "xmlns" set to HTML.', () => { + const result = new HTMLParser(window).parse( + ` +
+ + + + + + + + + + +
+ ` + ); + + expect(new HTMLSerializer().serializeToString(result)).toBe( `
@@ -506,10 +579,77 @@ describe('HTMLParser', () => {
` ); + + const div = result.children[0]; + const svg = div.children[0]; + const circle = svg.children[0]; + + expect(div.namespaceURI).toBe(NamespaceURI.html); + expect(svg.namespaceURI).toBe(NamespaceURI.svg); + expect(circle.namespaceURI).toBe(NamespaceURI.svg); + + // Attributes should be in lower-case now as the namespace is HTML + expect(svg.attributes.length).toBe(4); + + expect(svg.attributes[0].name).toBe('viewBox'); + expect(svg.attributes[0].value).toBe('0 0 300 100'); + expect(svg.attributes[0].namespaceURI).toBe(null); + expect(svg.attributes[0].specified).toBe(true); + expect(svg.attributes[0].ownerElement === svg).toBe(true); + expect(svg.attributes[0].ownerDocument === document).toBe(true); + + expect(svg.attributes[1].name).toBe('stroke'); + expect(svg.attributes[1].value).toBe('red'); + expect(svg.attributes[1].namespaceURI).toBe(null); + expect(svg.attributes[1].specified).toBe(true); + expect(svg.attributes[1].ownerElement === svg).toBe(true); + expect(svg.attributes[1].ownerDocument === document).toBe(true); + + expect(svg.attributes[2].name).toBe('fill'); + expect(svg.attributes[2].value).toBe('grey'); + expect(svg.attributes[2].namespaceURI).toBe(null); + expect(svg.attributes[2].specified).toBe(true); + expect(svg.attributes[2].ownerElement === svg).toBe(true); + expect(svg.attributes[2].ownerDocument === document).toBe(true); + + expect(svg.attributes[3].name).toBe('xmlns'); + expect(svg.attributes[3].value).toBe(NamespaceURI.html); + expect(svg.attributes[3].namespaceURI).toBe(NamespaceURI.xmlns); + expect(svg.attributes[3].specified).toBe(true); + expect(svg.attributes[3].ownerElement === svg).toBe(true); + expect(svg.attributes[3].ownerDocument === document).toBe(true); + + expect(svg.attributes['viewBox'].name).toBe('viewBox'); + expect(svg.attributes['viewBox'].value).toBe('0 0 300 100'); + expect(svg.attributes['viewBox'].namespaceURI).toBe(null); + expect(svg.attributes['viewBox'].specified).toBe(true); + expect(svg.attributes['viewBox'].ownerElement === svg).toBe(true); + expect(svg.attributes['viewBox'].ownerDocument === document).toBe(true); + + expect(svg.attributes['stroke'].name).toBe('stroke'); + expect(svg.attributes['stroke'].value).toBe('red'); + expect(svg.attributes['stroke'].namespaceURI).toBe(null); + expect(svg.attributes['stroke'].specified).toBe(true); + expect(svg.attributes['stroke'].ownerElement === svg).toBe(true); + expect(svg.attributes['stroke'].ownerDocument === document).toBe(true); + + expect(svg.attributes['fill'].name).toBe('fill'); + expect(svg.attributes['fill'].value).toBe('grey'); + expect(svg.attributes['fill'].namespaceURI).toBe(null); + expect(svg.attributes['fill'].specified).toBe(true); + expect(svg.attributes['fill'].ownerElement === svg).toBe(true); + expect(svg.attributes['fill'].ownerDocument === document).toBe(true); + + expect(svg.attributes['xmlns'].name).toBe('xmlns'); + expect(svg.attributes['xmlns'].value).toBe(NamespaceURI.html); + expect(svg.attributes['xmlns'].namespaceURI).toBe(NamespaceURI.xmlns); + expect(svg.attributes['xmlns'].specified).toBe(true); + expect(svg.attributes['xmlns'].ownerElement === svg).toBe(true); + expect(svg.attributes['xmlns'].ownerDocument === document).toBe(true); }); it('Parses a malformed SVG with "xmlns" set to HTML.', () => { - const root = new HTMLParser(window).parse( + const result = new HTMLParser(window).parse( `
@@ -532,7 +672,7 @@ describe('HTMLParser', () => { ` ); - expect(new HTMLSerializer().serializeToString(root)).toBe( + expect(new HTMLSerializer().serializeToString(result)).toBe( `
@@ -554,24 +694,47 @@ describe('HTMLParser', () => {
` ); + + expect(new XMLSerializer().serializeToString(result)).toBe( + ` +
+ + + + + + + + + + + + + + + + +
+ ` + ); }); it('Parses childless elements with start and end tag names in different case', () => { - const root = new HTMLParser(window).parse( + const result = new HTMLParser(window).parse( ` ` ); - expect((root.children[0]).innerText).toBe(`console.log('hello')`); + expect((result.children[0]).innerText).toBe(`console.log('hello')`); }); it('Handles different value types.', () => { const root1 = new HTMLParser(window).parse((null)); - expect(new HTMLSerializer().serializeToString(root1)).toBe(''); + expect(new HTMLSerializer().serializeToString(root1)).toBe('null'); const root2 = new HTMLParser(window).parse((undefined)); - expect(new HTMLSerializer().serializeToString(root2)).toBe(''); + expect(new HTMLSerializer().serializeToString(root2)).toBe('undefined'); const root3 = new HTMLParser(window).parse((1000)); expect(new HTMLSerializer().serializeToString(root3)).toBe('1000'); @@ -613,8 +776,8 @@ describe('HTMLParser', () => { ]; for (const html of testHTML) { - const root = new HTMLParser(window, { mode: HTMLParserModeEnum.htmlFragment }).parse(html); - expect(new HTMLSerializer().serializeToString(root)).toBe(html); + const result = new HTMLParser(window).parse(html); + expect(new HTMLSerializer().serializeToString(result)).toBe(html); } }); @@ -662,23 +825,26 @@ describe('HTMLParser', () => { '' + ''; - const root = new HTMLParser(window, { mode: HTMLParserModeEnum.htmlDocument }).parse(html); - expect(new HTMLSerializer().serializeToString(root)).toBe(expected); + const result = new HTMLParser(window).parse( + html, + document.implementation.createHTMLDocument() + ); + expect(new HTMLSerializer().serializeToString(result)).toBe(expected); }); it('Parses comments with dash in them.', () => { - const root = new HTMLParser(window).parse(''); - expect(root.childNodes.length).toBe(1); - expect(root.childNodes[0].nodeType).toBe(NodeTypeEnum.commentNode); - expect(root.childNodes[0].nodeValue).toBe(' comment with - in - it '); + const result = new HTMLParser(window).parse(''); + expect(result.childNodes.length).toBe(1); + expect(result.childNodes[0].nodeType).toBe(NodeTypeEnum.commentNode); + expect(result.childNodes[0].nodeValue).toBe(' comment with - in - it '); }); it('Parses