diff --git a/src/Parser.spec.ts b/src/Parser.spec.ts index 5997860be..c8ac3d643 100644 --- a/src/Parser.spec.ts +++ b/src/Parser.spec.ts @@ -79,30 +79,6 @@ describe("API", () => { expect(text).toBe("0&#xn"); }); - test("should update the position", () => { - const p = new Parser(); - - p.write("foo"); - - expect(p.startIndex).toBe(0); - expect(p.endIndex).toBe(2); - - p.write(""); - - expect(p.startIndex).toBe(11); - expect(p.endIndex).toBe(18); - - p.parseChunk(""); - - expect(p.startIndex).toBe(19); - expect(p.endIndex).toBe(27); - }); - test("should not have the start index be greater than the end index", () => { const onopentag = jest.fn(); const onclosetag = jest.fn(); @@ -134,22 +110,33 @@ describe("API", () => { }); test("should update the position when a single tag is spread across multiple chunks", () => { - const p = new Parser(); + let called = false; + const p = new Parser({ + onopentag() { + called = true; + expect(p.startIndex).toBe(0); + expect(p.endIndex).toBe(12); + }, + }); p.write("
"); - expect(p.startIndex).toBe(0); - expect(p.endIndex).toBe(12); + expect(called).toBe(true); }); test("should have the correct position for implied opening tags", () => { - const p = new Parser(); + let called = false; + const p = new Parser({ + onopentag() { + called = true; + expect(p.startIndex).toBe(0); + expect(p.endIndex).toBe(3); + }, + }); p.write("

"); - - expect(p.startIndex).toBe(0); - expect(p.endIndex).toBe(3); + expect(called).toBe(true); }); test("should parse <__proto__> (#387)", () => { diff --git a/src/Parser.ts b/src/Parser.ts index d5c6e2a97..f628fb3bd 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -196,6 +196,11 @@ export class Parser { public startIndex = 0; /** The end index of the last event. */ public endIndex = 0; + /** + * Store the start index of the current open tag, + * so we can update the start index for attributes. + */ + private openTagStart = 0; private tagname = ""; private attribname = ""; @@ -212,7 +217,6 @@ export class Parser { cbs?: Partial | null, private readonly options: ParserOptions = {} ) { - this.options = options; this.cbs = cbs ?? {}; this.lowerCaseTagNames = options.lowerCaseTags ?? !options.xmlMode; this.lowerCaseAttributeNames = @@ -224,24 +228,23 @@ export class Parser { this.cbs.onparserinit?.(this); } - private updatePosition(offset: number) { - this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset; - this.endIndex = this.tokenizer.getAbsoluteIndex(); - } - // Tokenizer event handlers + + /** @internal */ ontext(data: string): void { - this.startIndex = this.tokenizer.getAbsoluteSectionStart(); - this.endIndex = this.tokenizer.getAbsoluteIndex() - 1; + const idx = this.tokenizer.getAbsoluteIndex(); + this.endIndex = idx; this.cbs.ontext?.(data); + this.startIndex = idx; } protected isVoidElement(name: string): boolean { return !this.options.xmlMode && voidElements.has(name); } + /** @internal */ onopentagname(name: string): void { - this.updatePosition(1); + this.endIndex = this.tokenizer.getAbsoluteIndex(); if (this.lowerCaseTagNames) { name = name.toLowerCase(); @@ -251,6 +254,7 @@ export class Parser { } private emitOpenTag(name: string) { + this.openTagStart = this.startIndex; this.tagname = name; const impliesClose = @@ -277,7 +281,9 @@ export class Parser { if (this.cbs.onopentag) this.attribs = {}; } + /** @internal */ onopentagend(): void { + this.startIndex = this.openTagStart; this.endIndex = this.tokenizer.getAbsoluteIndex(); if (this.attribs) { @@ -287,11 +293,16 @@ export class Parser { if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) { this.cbs.onclosetag(this.tagname); } + this.tagname = ""; + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; } + /** @internal */ onclosetag(name: string): void { - this.updatePosition(2); + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.lowerCaseTagNames) { name = name.toLowerCase(); } @@ -319,8 +330,12 @@ export class Parser { this.emitOpenTag(name); this.closeCurrentTag(); } + + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; } + /** @internal */ onselfclosingtag(): void { if ( this.options.xmlMode || @@ -329,6 +344,7 @@ export class Parser { ) { this.closeCurrentTag(); } else { + // Ignore the fact that the tag is self-closing. this.onopentagend(); } } @@ -336,28 +352,36 @@ export class Parser { private closeCurrentTag() { const name = this.tagname; this.onopentagend(); - /* - * Self-closing tags will be on the top of the stack - * (cheaper check than in onclosetag) - */ + + // Self-closing tags will be on the top of the stack if (this.stack[this.stack.length - 1] === name) { + // Reset the start index + this.startIndex = this.openTagStart; + this.cbs.onclosetag?.(name); this.stack.pop(); } } + /** @internal */ onattribname(name: string): void { + this.startIndex = this.tokenizer.getAbsoluteSectionStart(); + if (this.lowerCaseAttributeNames) { name = name.toLowerCase(); } this.attribname = name; } + /** @internal */ onattribdata(value: string): void { this.attribvalue += value; } + /** @internal */ onattribend(quote: string | undefined | null): void { + this.endIndex = this.tokenizer.getAbsoluteIndex(); + this.cbs.onattribute?.(this.attribname, this.attribvalue, quote); if ( this.attribs && @@ -380,47 +404,70 @@ export class Parser { return name; } + /** @internal */ ondeclaration(value: string): void { + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.cbs.onprocessinginstruction) { - this.updatePosition(2); const name = this.getInstructionName(value); this.cbs.onprocessinginstruction(`!${name}`, `!${value}`); } + + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; } + /** @internal */ onprocessinginstruction(value: string): void { + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.cbs.onprocessinginstruction) { - this.updatePosition(2); const name = this.getInstructionName(value); this.cbs.onprocessinginstruction(`?${name}`, `?${value}`); } + + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; } + /** @internal */ oncomment(value: string): void { - this.updatePosition(4); + this.endIndex = this.tokenizer.getAbsoluteIndex(); + this.cbs.oncomment?.(value); this.cbs.oncommentend?.(); + + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; } + /** @internal */ oncdata(value: string): void { - this.updatePosition(9); + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.options.xmlMode || this.options.recognizeCDATA) { this.cbs.oncdatastart?.(); this.cbs.ontext?.(value); this.cbs.oncdataend?.(); } else { - this.oncomment(`[CDATA[${value}]]`); + this.cbs.oncomment?.(`[CDATA[${value}]]`); + this.cbs.oncommentend?.(); } + + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; } + /** @internal */ onerror(err: Error): void { this.cbs.onerror?.(err); } + /** @internal */ onend(): void { if (this.cbs.onclosetag) { - // Set start- and end indices for remaining tags - this.startIndex = this.endIndex = this.tokenizer.getAbsoluteIndex(); + // Set the end index for all remaining tags + this.endIndex = this.startIndex; for ( let i = this.stack.length; i > 0; @@ -440,6 +487,8 @@ export class Parser { this.attribname = ""; this.attribs = null; this.stack = []; + this.startIndex = 0; + this.endIndex = 0; this.cbs.onparserinit?.(this); } diff --git a/src/__fixtures__/Events/01-simple.json b/src/__fixtures__/Events/01-simple.json index c60ea3acf..aad23069b 100644 --- a/src/__fixtures__/Events/01-simple.json +++ b/src/__fixtures__/Events/01-simple.json @@ -10,6 +10,8 @@ }, { "event": "attribute", + "startIndex": 4, + "endIndex": 14, "data": ["class", "test", null] }, { @@ -25,6 +27,8 @@ }, { "event": "text", + "startIndex": 15, + "endIndex": 19, "data": ["adsf"] }, { diff --git a/src/__fixtures__/Events/02-template.json b/src/__fixtures__/Events/02-template.json index d75426e4b..57dc96e03 100644 --- a/src/__fixtures__/Events/02-template.json +++ b/src/__fixtures__/Events/02-template.json @@ -22,6 +22,8 @@ }, { "event": "attribute", + "startIndex": 11, + "endIndex": 30, "data": ["type", "text/template", "\""] }, { @@ -37,6 +39,8 @@ }, { "event": "text", + "startIndex": 32, + "endIndex": 49, "data": ["

Heading1

"] }, { diff --git a/src/__fixtures__/Events/03-lowercase_tags.json b/src/__fixtures__/Events/03-lowercase_tags.json index f68970940..fb86e89d7 100644 --- a/src/__fixtures__/Events/03-lowercase_tags.json +++ b/src/__fixtures__/Events/03-lowercase_tags.json @@ -15,6 +15,8 @@ }, { "event": "attribute", + "startIndex": 4, + "endIndex": 14, "data": ["class", "test", null] }, { @@ -30,6 +32,8 @@ }, { "event": "text", + "startIndex": 15, + "endIndex": 19, "data": ["adsf"] }, { diff --git a/src/__fixtures__/Events/04-cdata.json b/src/__fixtures__/Events/04-cdata.json index ad89b6950..872625745 100644 --- a/src/__fixtures__/Events/04-cdata.json +++ b/src/__fixtures__/Events/04-cdata.json @@ -27,6 +27,8 @@ }, { "event": "text", + "startIndex": 5, + "endIndex": 41, "data": [" asdf ><> fo"] }, { diff --git a/src/__fixtures__/Events/05-cdata-special.json b/src/__fixtures__/Events/05-cdata-special.json index 0837c835d..74f2b4224 100644 --- a/src/__fixtures__/Events/05-cdata-special.json +++ b/src/__fixtures__/Events/05-cdata-special.json @@ -16,6 +16,8 @@ }, { "event": "text", + "startIndex": 8, + "endIndex": 53, "data": ["/*<> fo/*]]>*/"] }, { diff --git a/src/__fixtures__/Events/06-leading-lt.json b/src/__fixtures__/Events/06-leading-lt.json index c1bee002b..0d5f0b0ca 100644 --- a/src/__fixtures__/Events/06-leading-lt.json +++ b/src/__fixtures__/Events/06-leading-lt.json @@ -4,6 +4,8 @@ "expected": [ { "event": "text", + "startIndex": 0, + "endIndex": 3, "data": [">a>"] } ] diff --git a/src/__fixtures__/Events/07-self-closing.json b/src/__fixtures__/Events/07-self-closing.json index 2536192bb..d6bea9444 100644 --- a/src/__fixtures__/Events/07-self-closing.json +++ b/src/__fixtures__/Events/07-self-closing.json @@ -10,6 +10,8 @@ }, { "event": "attribute", + "startIndex": 3, + "endIndex": 24, "data": ["href", "http://test.com/", null] }, { @@ -25,6 +27,8 @@ }, { "event": "text", + "startIndex": 25, + "endIndex": 28, "data": ["Foo"] }, { diff --git a/src/__fixtures__/Events/08-implicit-close-tags.json b/src/__fixtures__/Events/08-implicit-close-tags.json index e2c4ecc29..37e06f020 100644 --- a/src/__fixtures__/Events/08-implicit-close-tags.json +++ b/src/__fixtures__/Events/08-implicit-close-tags.json @@ -22,6 +22,8 @@ }, { "event": "attribute", + "startIndex": 8, + "endIndex": 18, "data": ["class", "test", null] }, { @@ -55,6 +57,8 @@ }, { "event": "attribute", + "startIndex": 31, + "endIndex": 47, "data": ["style", "width:100%", null] }, { @@ -94,6 +98,8 @@ }, { "event": "text", + "startIndex": 56, + "endIndex": 58, "data": ["TH"] }, { @@ -110,6 +116,8 @@ }, { "event": "attribute", + "startIndex": 62, + "endIndex": 71, "data": ["colspan", "2", null] }, { @@ -137,6 +145,8 @@ }, { "event": "text", + "startIndex": 76, + "endIndex": 83, "data": ["Heading"] }, { @@ -195,6 +205,8 @@ }, { "event": "text", + "startIndex": 101, + "endIndex": 104, "data": ["Div"] }, { @@ -235,6 +247,8 @@ }, { "event": "text", + "startIndex": 119, + "endIndex": 123, "data": ["Div2"] }, { @@ -311,6 +325,8 @@ }, { "event": "text", + "startIndex": 156, + "endIndex": 165, "data": ["Heading 2"] }, { @@ -351,6 +367,8 @@ }, { "event": "text", + "startIndex": 189, + "endIndex": 193, "data": ["Para"] }, { @@ -373,6 +391,8 @@ }, { "event": "text", + "startIndex": 197, + "endIndex": 206, "data": ["Heading 4"] }, { @@ -425,6 +445,8 @@ }, { "event": "text", + "startIndex": 222, + "endIndex": 224, "data": ["Hi"] }, { @@ -447,6 +469,8 @@ }, { "event": "text", + "startIndex": 228, + "endIndex": 231, "data": ["bye"] }, { diff --git a/src/__fixtures__/Events/09-attributes.json b/src/__fixtures__/Events/09-attributes.json index ac258e974..87bc4febe 100644 --- a/src/__fixtures__/Events/09-attributes.json +++ b/src/__fixtures__/Events/09-attributes.json @@ -10,18 +10,26 @@ }, { "event": "attribute", + "startIndex": 8, + "endIndex": 20, "data": ["class", "test0", "\""] }, { "event": "attribute", + "startIndex": 21, + "endIndex": 33, "data": ["title", "test1", "\""] }, { "event": "attribute", + "startIndex": 35, + "endIndex": 44, "data": ["disabled", ""] }, { "event": "attribute", + "startIndex": 44, + "endIndex": 55, "data": ["value", "test2", null] }, { @@ -40,6 +48,8 @@ }, { "event": "text", + "startIndex": 56, + "endIndex": 60, "data": ["adsf"] }, { diff --git a/src/__fixtures__/Events/10-crazy-attrib.json b/src/__fixtures__/Events/10-crazy-attrib.json index a80879725..41e01ae72 100644 --- a/src/__fixtures__/Events/10-crazy-attrib.json +++ b/src/__fixtures__/Events/10-crazy-attrib.json @@ -10,10 +10,14 @@ }, { "event": "attribute", + "startIndex": 3, + "endIndex": 8, "data": ["<", "", "'"] }, { "event": "attribute", + "startIndex": 10, + "endIndex": 14, "data": ["fail", ""] }, { @@ -30,6 +34,8 @@ }, { "event": "text", + "startIndex": 15, + "endIndex": 20, "data": ["stuff"] }, { diff --git a/src/__fixtures__/Events/11-script_in_script.json b/src/__fixtures__/Events/11-script_in_script.json index f68a64aaa..4702fd661 100644 --- a/src/__fixtures__/Events/11-script_in_script.json +++ b/src/__fixtures__/Events/11-script_in_script.json @@ -28,6 +28,8 @@ }, { "event": "text", + "startIndex": 11, + "endIndex": 44, "data": ["var str = '