Skip to content

Commit

Permalink
feat(parser): support Basic DocType Declarations
Browse files Browse the repository at this point in the history
This is not meant to support full DTD, only the declaration part to an external DTD

BREAKING CHANGE: Implementing XmlCstVisitor now requires implementing two additional methods:
(docTypeDecl and externalID)
  • Loading branch information
bd82 committed Jan 20, 2020
1 parent 582f8e3 commit 5b4db21
Show file tree
Hide file tree
Showing 11 changed files with 411 additions and 3 deletions.
12 changes: 12 additions & 0 deletions packages/ast/lib/build-ast.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
return astNode;
}

/**
* @param {docTypeDeclCtx} ctx
*/
/* istanbul ignore next - place holder*/
docTypeDecl(ctx, astNode) {}

/**
* @param {ExternalIDCtx} ctx
*/
/* istanbul ignore next - place holder*/
externalID(ctx, astNode) {}

/**
* @param ctx {ContentCtx}
* @param location {SourcePosition}
Expand Down
12 changes: 12 additions & 0 deletions packages/content-assist/lib/content-assist.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ class SuggestionContextVisitor extends BaseXmlCstVisitor {
/* istanbul ignore next - place holder*/
prolog(ctx, astNode) {}

/**
* @param {docTypeDeclCtx} ctx
*/
/* istanbul ignore next - place holder*/
docTypeDecl(ctx, astNode) {}

/**
* @param {ExternalIDCtx} ctx
*/
/* istanbul ignore next - place holder*/
externalID(ctx, astNode) {}

/**
* @param {ContentCtx} ctx
* @param {XMLElement} astNode
Expand Down
30 changes: 30 additions & 0 deletions packages/parser/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export abstract class XmlCstVisitor<IN, OUT> implements ICstVisitor<IN, OUT> {

document(ctx: DocumentCtx, param?: IN): OUT;
prolog(ctx: PrologCtx, param?: IN): OUT;
docTypeDecl(ctx: DocTypeDeclCtx, param?: IN): OUT;
externalID(ctx: ExternalIDCtx, param?: IN): OUT;
content(ctx: ContentCtx, param?: IN): OUT;
element(ctx: ElementCtx, param?: IN): OUT;
reference(ctx: ReferenceCtx, param?: IN): OUT;
Expand All @@ -52,6 +54,8 @@ export abstract class XmlCstVisitorWithDefaults<IN, OUT>

document(ctx: DocumentCtx, param?: IN): OUT;
prolog(ctx: PrologCtx, param?: IN): OUT;
docTypeDecl(ctx: DocTypeDeclCtx, param?: IN): OUT;
externalID(ctx: ExternalIDCtx, param?: IN): OUT;
content(ctx: ContentCtx, param?: IN): OUT;
element(ctx: ElementCtx, param?: IN): OUT;
reference(ctx: ReferenceCtx, param?: IN): OUT;
Expand All @@ -70,6 +74,7 @@ export interface DocumentCstNode extends CstNode {
}
export type DocumentCtx = {
prolog: PrologCstNode[];
docTypeDecl: DocTypeDeclNode[];
misc: MiscCstNode[];
element: ElementCstNode[];
};
Expand All @@ -78,16 +83,41 @@ export interface PrologCstNode extends CstNode {
name: "prolog";
children: PrologCtx;
}

export type PrologCtx = {
XMLDeclOpen: IToken[];
attribute: AttributeCstNode[];
SPECIAL_CLOSE: IToken[];
};

export interface DocTypeDeclNode extends CstNode {
name: "docTypeDecl";
children: DocTypeDeclCtx;
}

export type DocTypeDeclCtx = {
DocType: IToken[];
Name: IToken[];
externalID: ExternalIDNode[];
};

export interface ExternalIDNode extends CstNode {
name: "ExternalIDNode";
children: ExternalIDCtx;
}

export type ExternalIDCtx = {
System: IToken[];
Public: IToken[];
PubIDLiteral: IToken[];
SystemLiteral: IToken[];
};

export interface ContentCstNode extends CstNode {
name: "content";
children: ContentCtx;
}

export type ContentCtx = {
chardata: ChardataCstNode[];
element: ElementCstNode[];
Expand Down
11 changes: 9 additions & 2 deletions packages/parser/lib/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ const Comment = createToken({

const CData = createToken({ name: "CData", pattern: /<!\[CDATA\[.*?]]>/ });

const DTD = createToken({
const DocType = createToken({
name: "DocType",
pattern: /<!DOCTYPE/,
push_mode: "INSIDE"
});

const IgnoredDTD = createToken({
name: "DTD",
pattern: /<!.*?>/,
group: Lexer.SKIPPED
Expand Down Expand Up @@ -149,7 +155,8 @@ const xmlLexerDefinition = {
OUTSIDE: [
Comment,
CData,
DTD,
DocType,
IgnoredDTD,
EntityRef,
CharRef,
SEA_WS,
Expand Down
49 changes: 48 additions & 1 deletion packages/parser/lib/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,19 @@ class Parser extends CstParser {
$.SUBRULE($.misc);
});

$.SUBRULE($.element);
$.OPTION2(() => {
$.SUBRULE($.docTypeDecl);
});

$.MANY2(() => {
$.SUBRULE2($.misc);
});

$.SUBRULE($.element);

$.MANY3(() => {
$.SUBRULE3($.misc);
});
});

$.RULE("prolog", () => {
Expand All @@ -37,6 +45,45 @@ class Parser extends CstParser {
$.CONSUME(t.SPECIAL_CLOSE);
});

// https://www.w3.org/TR/xml/#NT-doctypedecl
$.RULE("docTypeDecl", () => {
$.CONSUME(t.DocType);
$.CONSUME(t.Name);

$.OPTION(() => {
$.SUBRULE($.externalID);
});

// The internal subSet part is intentionally not implemented because we do not at this
// time wish to implement a full DTD Parser as part of this project...
// https://www.w3.org/TR/xml/#NT-intSubset

$.CONSUME(t.CLOSE);
});

$.RULE("externalID", () => {
// Using gates to assert the value of the "Name" Identifiers.
// We could use Categories to model un-reserved keywords, however I am not sure
// The added complexity is needed at this time...
$.OR([
{
GATE: () => $.LA(1).image === "SYSTEM",
ALT: () => {
$.CONSUME2(t.Name, { LABEL: "System" });
$.CONSUME(t.STRING, { LABEL: "SystemLiteral" });
}
},
{
GATE: () => $.LA(1).image === "PUBLIC",
ALT: () => {
$.CONSUME3(t.Name, { LABEL: "Public" });
$.CONSUME2(t.STRING, { LABEL: "PubIDLiteral" });
$.CONSUME3(t.STRING, { LABEL: "SystemLiteral" });
}
}
]);
});

$.RULE("content", () => {
$.MANY(() => {
$.OR([
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<!DOCTYPE task PUBLIC "-//OASIS//DTD DITA Task//EN" "task.dtd">
<note></note>
145 changes: 145 additions & 0 deletions packages/parser/test/snapshots/valid/doc-type-public/output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
module.exports = {
cst: {
name: "document",
children: {
docTypeDecl: [
{
name: "docTypeDecl",
children: {
DocType: [
{
image: "<!DOCTYPE",
startOffset: 0,
endOffset: 8,
tokenType: "DocType"
}
],
Name: [
{
image: "task",
startOffset: 10,
endOffset: 13,
tokenType: "Name"
}
],
externalID: [
{
name: "externalID",
children: {
Public: [
{
image: "PUBLIC",
startOffset: 15,
endOffset: 20,
tokenType: "Name"
}
],
PubIDLiteral: [
{
image: '"-//OASIS//DTD DITA Task//EN"',
startOffset: 22,
endOffset: 50,
tokenType: "STRING"
}
],
SystemLiteral: [
{
image: '"task.dtd"',
startOffset: 52,
endOffset: 61,
tokenType: "STRING"
}
]
},
location: { startOffset: 15, endOffset: 61 }
}
],
CLOSE: [
{ image: ">", startOffset: 62, endOffset: 62, tokenType: "CLOSE" }
]
},
location: { startOffset: 0, endOffset: 62 }
}
],
misc: [
{
name: "misc",
children: {
SEA_WS: [
{
image: "\n",
startOffset: 63,
endOffset: 63,
tokenType: "SEA_WS"
}
]
},
location: { startOffset: 63, endOffset: 63 }
},
{
name: "misc",
children: {
SEA_WS: [
{
image: "\n",
startOffset: 77,
endOffset: 77,
tokenType: "SEA_WS"
}
]
},
location: { startOffset: 77, endOffset: 77 }
}
],
element: [
{
name: "element",
children: {
OPEN: [
{ image: "<", startOffset: 64, endOffset: 64, tokenType: "OPEN" }
],
Name: [
{
image: "note",
startOffset: 65,
endOffset: 68,
tokenType: "Name"
}
],
START_CLOSE: [
{ image: ">", startOffset: 69, endOffset: 69, tokenType: "CLOSE" }
],
content: [
{
name: "content",
children: {},
location: { startOffset: null, endOffset: null }
}
],
SLASH_OPEN: [
{
image: "</",
startOffset: 70,
endOffset: 71,
tokenType: "SLASH_OPEN"
}
],
END_NAME: [
{
image: "note",
startOffset: 72,
endOffset: 75,
tokenType: "Name"
}
],
END: [
{ image: ">", startOffset: 76, endOffset: 76, tokenType: "CLOSE" }
]
},
location: { startOffset: 64, endOffset: 76 }
}
]
},
location: { startOffset: 0, endOffset: 77 }
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
const {
executeValidSampleTest,
testNameFromDir
} = require("../../../sample-test");
describe(`${testNameFromDir(__dirname)}`, () => {
executeValidSampleTest(__dirname);
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<!DOCTYPE greeting SYSTEM "hello.dtd">
<note></note>
Loading

0 comments on commit 5b4db21

Please sign in to comment.