Skip to content

Commit

Permalink
Support type guards for the output of the parser (#99)
Browse files Browse the repository at this point in the history
* Support type guards for the output of the parser through the use of a discriminating union

* Fixes #100: Incorrent event type for doctype, and add tests where this was not yet checked
  • Loading branch information
wrnrlr authored Jan 25, 2025
1 parent 09125e4 commit ba08def
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 33 deletions.
4 changes: 4 additions & 0 deletions src/js/__test__/cdada.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ describe('When parsing XML, the SaxWasm', () => {
deepStrictEqual(JSON.parse(JSON.stringify(start)), { line: 0, character: 5 });
deepStrictEqual(JSON.parse(JSON.stringify(end)), { line: 0, character: 84 });
strictEqual(value, 'did you know "x < y" & "z > y"? so I [guess] that means that z > x ');
strictEqual(_event, SaxEventType.Cdata);
});

it('should report cdata (lower case) correctly', () => {
Expand All @@ -42,6 +43,7 @@ describe('When parsing XML, the SaxWasm', () => {
deepStrictEqual(JSON.parse(JSON.stringify(start)), { line: 0, character: 5 });
deepStrictEqual(JSON.parse(JSON.stringify(end)), { line: 0, character: 83 });
strictEqual(value, ' did you know "x < y" & "z > y"? so I guess that means that z > x ');
strictEqual(_event, SaxEventType.Cdata);
});

it('should report cDaTa (mixed case) correctly', () => {
Expand All @@ -50,6 +52,7 @@ describe('When parsing XML, the SaxWasm', () => {
deepStrictEqual(JSON.parse(JSON.stringify(start)), { line: 0, character: 5 });
deepStrictEqual(JSON.parse(JSON.stringify(end)), { line: 0, character: 83 });
strictEqual(value, ' did you know "x < y" & "z > y"? so I guess that means that z > x ');
strictEqual(_event, SaxEventType.Cdata);
});

it("should support empty cdata", () => {
Expand All @@ -66,5 +69,6 @@ describe('When parsing XML, the SaxWasm', () => {
const [empty, something] = _data;
strictEqual(empty.value, "");
strictEqual(something.value, "something");
strictEqual(_event, SaxEventType.Cdata);
});
});
4 changes: 3 additions & 1 deletion src/js/__test__/comment.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const saxWasm = readFileSync(resolve(__dirname, '../../../lib/sax-wasm.wasm'));

describe('SaxWasm', () => {
let parser: SAXParser;
let _event: SaxEventType;
let _event: SaxEventType|undefined;
let _data: (Attribute & Text & Tag)[];

beforeAll(async () => {
Expand Down Expand Up @@ -60,11 +60,13 @@ describe('SaxWasm', () => {
parser.write(Buffer.from(`<!--name="test 3 attr" some comment--> <!-- name="test 3 attr" some comment -->`));
strictEqual(_data[0].value, 'name="test 3 attr" some comment');
strictEqual(_data[1].value, ' name="test 3 attr" some comment ');
strictEqual(_event, SaxEventType.Comment);
});

it ('should allow for chars that look like comment endings but are not really endings', () => {
parser.events = SaxEventType.Comment;
parser.write(Buffer.from(`<!--name="test 3 attr" some comment -- > not an ending-->`));
strictEqual(_data[0].value, 'name="test 3 attr" some comment -- > not an ending');
strictEqual(_event, SaxEventType.Comment);
});
});
3 changes: 3 additions & 0 deletions src/js/__test__/doctype.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ describe('When parsing XML, the SaxWasm', () => {
deepEqual(start, { line: 0, character: 0 });
deepEqual(end, { line: 0, character: 15 });
strictEqual(value, 'html');
strictEqual(_event, SaxEventType.Doctype);
});

it('should report doctype (lower case) correctly', () => {
Expand All @@ -42,6 +43,7 @@ describe('When parsing XML, the SaxWasm', () => {
deepEqual(start, { line: 0, character: 0 });
deepEqual(end, { line: 0, character: 15 });
strictEqual(value, 'html');
strictEqual(_event, SaxEventType.Doctype);
});

it('should report DocType (mixed case) correctly', () => {
Expand All @@ -50,5 +52,6 @@ describe('When parsing XML, the SaxWasm', () => {
deepEqual(start, { line: 0, character: 0 });
deepEqual(end, { line: 0, character: 15 });
strictEqual(value, 'html');
strictEqual(_event, SaxEventType.Doctype);
});
});
5 changes: 4 additions & 1 deletion src/js/__test__/text.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { readFileSync } from 'fs';
import { resolve } from 'path';
import { deepStrictEqual } from 'assert';
import { deepStrictEqual, strictEqual } from 'assert';
import { Detail, Reader, SaxEventType, SAXParser } from '../saxWasm';

const saxWasm = readFileSync(resolve(__dirname, '../../../lib/sax-wasm.wasm'));
Expand All @@ -27,6 +27,7 @@ describe('SaxWasm', () => {
parser.write(Buffer.from('this is just plain text <br>'));
parser.end();
deepStrictEqual(_data[0].value,'this is just plain text ');
strictEqual(_event, SaxEventType.Text);
});

it('should report multiple text blocks when child nodes exist between them', () => {
Expand All @@ -36,6 +37,7 @@ describe('SaxWasm', () => {
deepStrictEqual(_data[0].value,'I like to use ');
deepStrictEqual(_data[1].value,'bold text');
deepStrictEqual(_data[2].value,' to emphasize');
strictEqual(_event, SaxEventType.Text);
});

it('should not capture empty white space between tags', () => {
Expand All @@ -53,5 +55,6 @@ describe('SaxWasm', () => {
parser.write(Buffer.from('a happy little parser'));
parser.end();
deepStrictEqual(JSON.stringify(_data[0]),'{"start":{"line":0,"character":0},"end":{"line":0,"character":21},"value":"a happy little parser"}');
strictEqual(_event, SaxEventType.Text);
});
});
64 changes: 34 additions & 30 deletions src/js/saxWasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,32 @@
* slight performance improvement which becomes more noticeable
* on very large documents.
*/
export enum SaxEventType {
// 1
Text = 0b1,
// 2
ProcessingInstruction = 0b10,
// 4
Declaration = 0b100,
// 8
Doctype = 0b1000,
// 16
Comment = 0b10000,
// 32
OpenTagStart = 0b100000,
// 64
Attribute = 0b1000000,
// 128
OpenTag = 0b10000000,
// 256
CloseTag = 0b100000000,
// 512
Cdata = 0b1000000000,
}
export const SaxEventType = {
Text: 0b1,
ProcessingInstruction: 0b10,
Declaration: 0b100,
Doctype: 0b1000,
Comment: 0b10000,
OpenTagStart: 0b100000,
Attribute: 0b1000000,
OpenTag: 0b10000000,
CloseTag: 0b100000000,
Cdata: 0b1000000000,
} as const;

export type SaxEventType = typeof SaxEventType[keyof typeof SaxEventType]

export type SaxEvent = [typeof SaxEventType.Text, Text]
| [typeof SaxEventType.ProcessingInstruction, ProcInst]
| [typeof SaxEventType.Declaration, Text]
| [typeof SaxEventType.Doctype, Text]
| [typeof SaxEventType.Comment, Text]
| [typeof SaxEventType.OpenTagStart, Tag]
| [typeof SaxEventType.Attribute, Attribute]
| [typeof SaxEventType.OpenTag, Tag]
| [typeof SaxEventType.CloseTag, Tag]
| [typeof SaxEventType.Cdata, Text]

export type AttributeDetail = {
readonly type: 0 | 1;
readonly name: TextDetail;
Expand Down Expand Up @@ -488,7 +492,7 @@ export class SAXParser {
public events?: number;
public wasmSaxParser?: WasmSaxParser;

public eventHandler?: (type: SaxEventType, detail: Reader<Detail>) => void;
public eventHandler?: <T extends SaxEvent>(type: T[0], detail: T[1]) => void;

private createDetailConstructor<T extends { new(...args: unknown[]): {}; LENGTH: number }>(Constructor: T) {
return (memoryBuffer: ArrayBuffer, ptr: number): Reader<Detail> => {
Expand Down Expand Up @@ -579,10 +583,10 @@ export class SAXParser {
* })();
* ```
*/
public async *parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Reader<Detail>]> {
let eventAggregator: [SaxEventType, Reader<Detail>][] | null = [];
this.eventHandler = function (event, detail) {
eventAggregator.push([event, detail]);
public async *parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<SaxEvent> {
let eventAggregator: SaxEvent[] = [];
this.eventHandler = function <T extends SaxEvent> (event:T[0], detail:T[1]) {
eventAggregator.push([event, detail] as T);
};

while (true) {
Expand Down Expand Up @@ -747,16 +751,16 @@ export class SAXParser {
throw new Error(`Failed to instantiate the parser.`);
}

public eventTrap = (event: number, ptr: number): void => {
public eventTrap = (event: SaxEventType, ptr: number): void => {
if (!this.wasmSaxParser || !this.eventHandler) {
return;
}
const memoryBuffer = this.wasmSaxParser.memory.buffer;
let detail: Reader<Detail>;
let detail: Attribute | Text | Tag | ProcInst;

const constructor = this.eventToDetailConstructor.get(event);
if (constructor) {
detail = constructor(memoryBuffer, ptr);
detail = constructor(memoryBuffer, ptr) as Attribute | Text | Tag | ProcInst
} else {
throw new Error("No reader for this event type");
}
Expand Down
2 changes: 1 addition & 1 deletion src/sax/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ impl<'a> SAXParser<'a> {
if self.events[Event::Doctype] && markup_decl.hydrate(self.source_ptr) {
markup_decl.value.truncate(markup_decl.value.len() - 1); // remove '>' or '['

self.event_handler.handle_event(Event::Cdata, Entity::Text(&markup_decl));
self.event_handler.handle_event(Event::Doctype, Entity::Text(&markup_decl));
self.dispatched.push(Dispatched::Text(markup_decl));
}
self.state = State::BeginWhitespace;
Expand Down

0 comments on commit ba08def

Please sign in to comment.