Skip to content

Commit

Permalink
fix(tokenizer): Decode entities after < (#1008)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored Nov 11, 2021
1 parent 6a1b54d commit daa0281
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 36 deletions.
62 changes: 26 additions & 36 deletions src/Tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ const enum CharCodes {
Eq = 0x3d, // "="
Gt = 0x3e, // ">"
Questionmark = 0x3f, // "?"
LowerC = 0x63, // "c"
LowerS = 0x73, // "s"
LowerT = 0x74, // "t"
UpperA = 0x41, // "A"
LowerA = 0x61, // "a"
UpperF = 0x46, // "F"
Expand All @@ -37,7 +34,6 @@ const enum CharCodes {
LowerZ = 0x7a, // "z"
LowerX = 0x78, // "x"
OpeningSquareBracket = 0x5b, // "["
ClosingSquareBracket = 0x5d, // "]"
}

/** All the states the tokenizer can be in. */
Expand Down Expand Up @@ -402,38 +398,39 @@ export default class Tokenizer {
* We allow anything that wouldn't end the tag.
*/
private isTagStartChar(c: number) {
return isASCIIAlpha(c) || (this.xmlMode && !isEndOfTagSection(c));
return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c);
}

private startSpecial(sequence: Uint16Array, offset: number) {
this.isSpecial = true;
this.currentSequence = sequence;
this.sequenceIndex = offset;
this._state = State.SpecialStartSequence;
}

private stateBeforeTagName(c: number) {
if (c === CharCodes.Slash) {
this._state = State.BeforeClosingTagName;
} else if (c === CharCodes.Lt) {
this.cbs.ontext(this.getSection());
this.sectionStart = this._index;
} else if (c === CharCodes.Gt || isWhitespace(c)) {
this._state = State.Text;
} else if (c === CharCodes.ExclamationMark) {
if (c === CharCodes.ExclamationMark) {
this._state = State.BeforeDeclaration;
this.sectionStart = this._index + 1;
} else if (c === CharCodes.Questionmark) {
this._state = State.InProcessingInstruction;
this.sectionStart = this._index + 1;
} else if (!this.isTagStartChar(c)) {
this._state = State.Text;
} else {
} else if (this.isTagStartChar(c)) {
const lower = c | 0x20;
this.sectionStart = this._index;
if (!this.xmlMode && lower === CharCodes.LowerT) {
this.isSpecial = true;
this.currentSequence = Sequences.TitleEnd;
this.sequenceIndex = 3;
this._state = State.SpecialStartSequence;
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3);
} else {
this._state =
!this.xmlMode && lower === CharCodes.LowerS
!this.xmlMode && lower === Sequences.ScriptEnd[2]
? State.BeforeSpecialS
: State.InTagName;
}
} else if (c === CharCodes.Slash) {
this._state = State.BeforeClosingTagName;
} else {
this._state = State.Text;
this.stateText(c);
}
}
private stateInTagName(c: number) {
Expand All @@ -449,11 +446,10 @@ export default class Tokenizer {
// Ignore
} else if (c === CharCodes.Gt) {
this._state = State.Text;
} else if (!this.isTagStartChar(c)) {
this._state = State.InSpecialComment;
this.sectionStart = this._index;
} else {
this._state = State.InClosingTagName;
this._state = this.isTagStartChar(c)
? State.InClosingTagName
: State.InSpecialComment;
this.sectionStart = this._index;
}
}
Expand Down Expand Up @@ -617,16 +613,10 @@ export default class Tokenizer {
}
private stateBeforeSpecialS(c: number) {
const lower = c | 0x20;
if (lower === CharCodes.LowerC) {
this.isSpecial = true;
this.currentSequence = Sequences.ScriptEnd;
this.sequenceIndex = 4;
this._state = State.SpecialStartSequence;
} else if (lower === CharCodes.LowerT) {
this.isSpecial = true;
this.currentSequence = Sequences.StyleEnd;
this.sequenceIndex = 4;
this._state = State.SpecialStartSequence;
if (lower === Sequences.ScriptEnd[3]) {
this.startSpecial(Sequences.ScriptEnd, 4);
} else if (lower === Sequences.StyleEnd[3]) {
this.startSpecial(Sequences.StyleEnd, 4);
} else {
this._state = State.InTagName;
this.stateInTagName(c); // Consume the token again
Expand Down
13 changes: 13 additions & 0 deletions src/__fixtures__/Events/46-entity-after-lt.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "Entity after <",
"options": {},
"input": "<&amp;",
"expected": [
{
"event": "text",
"startIndex": 0,
"endIndex": 5,
"data": ["<&"]
}
]
}

0 comments on commit daa0281

Please sign in to comment.