Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Fix how indices are computed, add attrib indices #929

Merged
merged 1 commit into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 18 additions & 31 deletions src/Parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,30 +79,6 @@ describe("API", () => {
expect(text).toBe("0&#xn");
});

test("should update the position", () => {
const p = new Parser();

p.write("foo");

expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(2);

p.write("<select>");

expect(p.startIndex).toBe(3);
expect(p.endIndex).toBe(10);

p.write("<select>");

expect(p.startIndex).toBe(11);
expect(p.endIndex).toBe(18);

p.parseChunk("</select>");

expect(p.startIndex).toBe(19);
expect(p.endIndex).toBe(27);
});

test("should not have the start index be greater than the end index", () => {
const onopentag = jest.fn();
const onclosetag = jest.fn();
Expand Down Expand Up @@ -134,22 +110,33 @@ describe("API", () => {
});

test("should update the position when a single tag is spread across multiple chunks", () => {
const p = new Parser();
let called = false;
const p = new Parser({
onopentag() {
called = true;
expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(12);
},
});

p.write("<div ");
p.write("foo=bar>");

expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(12);
expect(called).toBe(true);
});

test("should have the correct position for implied opening tags", () => {
const p = new Parser();
let called = false;
const p = new Parser({
onopentag() {
called = true;
expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(3);
},
});

p.write("</p>");

expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(3);
expect(called).toBe(true);
});

test("should parse <__proto__> (#387)", () => {
Expand Down
91 changes: 70 additions & 21 deletions src/Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ export class Parser {
public startIndex = 0;
/** The end index of the last event. */
public endIndex = 0;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
private openTagStart = 0;

private tagname = "";
private attribname = "";
Expand All @@ -212,7 +217,6 @@ export class Parser {
cbs?: Partial<Handler> | null,
private readonly options: ParserOptions = {}
) {
this.options = options;
this.cbs = cbs ?? {};
this.lowerCaseTagNames = options.lowerCaseTags ?? !options.xmlMode;
this.lowerCaseAttributeNames =
Expand All @@ -224,24 +228,23 @@ export class Parser {
this.cbs.onparserinit?.(this);
}

private updatePosition(offset: number) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
this.endIndex = this.tokenizer.getAbsoluteIndex();
}

// Tokenizer event handlers

/** @internal */
ontext(data: string): void {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
const idx = this.tokenizer.getAbsoluteIndex();
this.endIndex = idx;
this.cbs.ontext?.(data);
this.startIndex = idx;
}

protected isVoidElement(name: string): boolean {
return !this.options.xmlMode && voidElements.has(name);
}

/** @internal */
onopentagname(name: string): void {
this.updatePosition(1);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.lowerCaseTagNames) {
name = name.toLowerCase();
Expand All @@ -251,6 +254,7 @@ export class Parser {
}

private emitOpenTag(name: string) {
this.openTagStart = this.startIndex;
this.tagname = name;

const impliesClose =
Expand All @@ -277,7 +281,9 @@ export class Parser {
if (this.cbs.onopentag) this.attribs = {};
}

/** @internal */
onopentagend(): void {
this.startIndex = this.openTagStart;
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.attribs) {
Expand All @@ -287,11 +293,16 @@ export class Parser {
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname);
}

this.tagname = "";
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onclosetag(name: string): void {
this.updatePosition(2);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
Expand Down Expand Up @@ -319,8 +330,12 @@ export class Parser {
this.emitOpenTag(name);
this.closeCurrentTag();
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onselfclosingtag(): void {
if (
this.options.xmlMode ||
Expand All @@ -329,35 +344,44 @@ export class Parser {
) {
this.closeCurrentTag();
} else {
// Ignore the fact that the tag is self-closing.
this.onopentagend();
}
}

private closeCurrentTag() {
const name = this.tagname;
this.onopentagend();
/*
* Self-closing tags will be on the top of the stack
* (cheaper check than in onclosetag)
*/

// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
// Reset the start index
this.startIndex = this.openTagStart;

this.cbs.onclosetag?.(name);
this.stack.pop();
}
}

/** @internal */
onattribname(name: string): void {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();

if (this.lowerCaseAttributeNames) {
name = name.toLowerCase();
}
this.attribname = name;
}

/** @internal */
onattribdata(value: string): void {
this.attribvalue += value;
}

/** @internal */
onattribend(quote: string | undefined | null): void {
this.endIndex = this.tokenizer.getAbsoluteIndex();

this.cbs.onattribute?.(this.attribname, this.attribvalue, quote);
if (
this.attribs &&
Expand All @@ -380,47 +404,70 @@ export class Parser {
return name;
}

/** @internal */
ondeclaration(value: string): void {
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`!${name}`, `!${value}`);
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onprocessinginstruction(value: string): void {
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`?${name}`, `?${value}`);
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
oncomment(value: string): void {
this.updatePosition(4);
this.endIndex = this.tokenizer.getAbsoluteIndex();

this.cbs.oncomment?.(value);
this.cbs.oncommentend?.();

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
oncdata(value: string): void {
this.updatePosition(9);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.options.xmlMode || this.options.recognizeCDATA) {
this.cbs.oncdatastart?.();
this.cbs.ontext?.(value);
this.cbs.oncdataend?.();
} else {
this.oncomment(`[CDATA[${value}]]`);
this.cbs.oncomment?.(`[CDATA[${value}]]`);
this.cbs.oncommentend?.();
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onerror(err: Error): void {
this.cbs.onerror?.(err);
}

/** @internal */
onend(): void {
if (this.cbs.onclosetag) {
// Set start- and end indices for remaining tags
this.startIndex = this.endIndex = this.tokenizer.getAbsoluteIndex();
// Set the end index for all remaining tags
this.endIndex = this.startIndex;
for (
let i = this.stack.length;
i > 0;
Expand All @@ -440,6 +487,8 @@ export class Parser {
this.attribname = "";
this.attribs = null;
this.stack = [];
this.startIndex = 0;
this.endIndex = 0;
this.cbs.onparserinit?.(this);
}

Expand Down
4 changes: 4 additions & 0 deletions src/__fixtures__/Events/01-simple.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
},
{
"event": "attribute",
"startIndex": 4,
"endIndex": 14,
"data": ["class", "test", null]
},
{
Expand All @@ -25,6 +27,8 @@
},
{
"event": "text",
"startIndex": 15,
"endIndex": 19,
"data": ["adsf"]
},
{
Expand Down
4 changes: 4 additions & 0 deletions src/__fixtures__/Events/02-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
},
{
"event": "attribute",
"startIndex": 11,
"endIndex": 30,
"data": ["type", "text/template", "\""]
},
{
Expand All @@ -37,6 +39,8 @@
},
{
"event": "text",
"startIndex": 32,
"endIndex": 49,
"data": ["<h1>Heading1</h1>"]
},
{
Expand Down
4 changes: 4 additions & 0 deletions src/__fixtures__/Events/03-lowercase_tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
},
{
"event": "attribute",
"startIndex": 4,
"endIndex": 14,
"data": ["class", "test", null]
},
{
Expand All @@ -30,6 +32,8 @@
},
{
"event": "text",
"startIndex": 15,
"endIndex": 19,
"data": ["adsf"]
},
{
Expand Down
2 changes: 2 additions & 0 deletions src/__fixtures__/Events/04-cdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
},
{
"event": "text",
"startIndex": 5,
"endIndex": 41,
"data": [" asdf ><asdf></adsf><> fo"]
},
{
Expand Down
2 changes: 2 additions & 0 deletions src/__fixtures__/Events/05-cdata-special.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
},
{
"event": "text",
"startIndex": 8,
"endIndex": 53,
"data": ["/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"]
},
{
Expand Down
2 changes: 2 additions & 0 deletions src/__fixtures__/Events/06-leading-lt.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
"expected": [
{
"event": "text",
"startIndex": 0,
"endIndex": 3,
"data": [">a>"]
}
]
Expand Down
Loading