Skip to content

Commit 8ce840d

Browse files
committed
feat(ast): attributesRange and guessedAttributeRange support
Details in the api.d.ts changed in this commit BREAKING CHANGE: buildAst now requires a tokenVector argument fixes #51
1 parent 61cdcb9 commit 8ce840d

File tree

32 files changed

+252
-130
lines changed

32 files changed

+252
-130
lines changed

packages/ast/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ const xmlText = `<note>
4242
</note>
4343
`;
4444

45-
const { cst } = parse(xmlText);
46-
const xmlDocAst = buildAst(cst);
45+
const { cst, tokenVector } = parse(xmlText);
46+
const xmlDocAst = buildAst(cst, tokenVector);
4747
console.log(xmlDocAst.rootElement.name); // -> note
4848

4949
// A Visitor allows us to invoke actions on the XML ASTNodes without worrying about

packages/ast/api.d.ts

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
import { IToken } from "chevrotain";
12
import { DocumentCstNode } from "@xml-tools/parser";
23

34
/**
45
* Builds an XML Ast from an XML Cst.
56
* Can process even partial CSTs...
6-
*
7-
* @param docCst
87
*/
9-
export function buildAst(docCst: DocumentCstNode): XMLDocument;
8+
export function buildAst(
9+
docCst: DocumentCstNode,
10+
tokenVector: IToken[]
11+
): XMLDocument;
1012

1113
/**
1214
* An Abstract Syntax Tree structure
@@ -85,6 +87,23 @@ declare interface XMLElement {
8587
// Will not exist if any of the closing "brackets" are missing
8688
// - e.g in a self closing element.
8789
readonly closeBody?: XMLToken;
90+
91+
// Describes the range of the attributes section:
92+
// This starts one character **after** the opening name token
93+
// and ends one character before the open body closing '>' or self closing '/>
94+
// Examples:
95+
// <SomeTag attribute1="value1" attrib2="666" >Some content</SomeTag>
96+
// <===== attributesRange =====>
97+
// <SomeTag attribute1="value1" attrib2="666" />
98+
// <===== attributesRange =====>
99+
readonly attributesRange?: SourceRange;
100+
101+
// Same as attributesRange except this property will be used for partially valid
102+
// XMLElements when the AstBuilder cannot be certain what is the exact attributes range.
103+
// - Only one of the attributeRanges properties may exist at the same time.
104+
// - It is possible that in some cases it won't be possible to even guess the attributes range.
105+
// In that scenario neither of the properties will exist.
106+
readonly guessedAttributesRange?: SourceRange;
88107
};
89108
readonly position: SourcePosition;
90109
}
@@ -114,6 +133,11 @@ declare interface XMLAttribute {
114133
readonly position: SourcePosition;
115134
}
116135

136+
declare interface SourceRange {
137+
readonly startOffset: number;
138+
readonly endOffset: number;
139+
}
140+
117141
declare interface SourcePosition {
118142
readonly startOffset: number;
119143
readonly endOffset: number;

packages/ast/lib/build-ast.js

Lines changed: 105 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
const { BaseXmlCstVisitor } = require("@xml-tools/parser");
22
const {
3+
last,
34
forEach,
45
reduce,
56
map,
@@ -8,14 +9,16 @@ const {
89
isEmpty,
910
isArray
1011
} = require("lodash");
11-
12+
const { findNextTextualToken } = require("@xml-tools/common");
1213
const { getAstChildrenReflective } = require("./utils");
1314

1415
/**
1516
* @param {DocumentCstNode} docCst
17+
* @param {IToken[]} tokenVector
1618
* @returns {XMLDocument}
1719
*/
18-
function buildAst(docCst) {
20+
function buildAst(docCst, tokenVector) {
21+
AstBuilder.setState({ tokenVector });
1922
const xmlDocAst = AstBuilder.visit(docCst);
2023

2124
if (xmlDocAst.rootElement !== invalidSyntax) {
@@ -29,6 +32,10 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
2932
super();
3033
}
3134

35+
setState({ tokenVector }) {
36+
this.tokenVector = tokenVector;
37+
}
38+
3239
visit(cstNode) {
3340
return super.visit(cstNode, cstNode.location);
3441
}
@@ -104,7 +111,7 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
104111
}
105112

106113
/**
107-
* @param ctx {ElementCstNode}
114+
* @param ctx {ElementCtx}
108115
* @param location {SourcePosition}
109116
*/
110117
element(ctx, location) {
@@ -129,49 +136,10 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
129136
astNode.textContents = textContents;
130137
}
131138

132-
if (ctx.Name !== undefined && ctx.Name[0].isInsertedInRecovery !== true) {
133-
const openNameToken = ctx.Name[0];
134-
astNode.syntax.openName = toXMLToken(openNameToken);
135-
const nsParts = nsToParts(openNameToken.image);
136-
if (nsParts !== null) {
137-
astNode.ns = nsParts.ns;
138-
astNode.name = nsParts.name;
139-
} else {
140-
astNode.name = openNameToken.image;
141-
}
142-
}
139+
handleElementOpenCloseNameRanges(astNode, ctx);
140+
handleElementOpenCloseBodyRanges(astNode, ctx);
141+
handleElementAttributeRanges(astNode, ctx, this.tokenVector);
143142

144-
if (
145-
ctx.END_NAME !== undefined &&
146-
ctx.END_NAME[0].isInsertedInRecovery !== true
147-
) {
148-
astNode.syntax.closeName = toXMLToken(ctx.END_NAME[0]);
149-
}
150-
151-
/* istanbul ignore else - Defensive Coding */
152-
if (exists(ctx.OPEN)) {
153-
let openBodyCloseTok = undefined;
154-
/* istanbul ignore else - Defensive Coding */
155-
if (exists(ctx.START_CLOSE)) {
156-
openBodyCloseTok = ctx.START_CLOSE[0];
157-
} else if (exists(ctx.SLASH_CLOSE)) {
158-
openBodyCloseTok = ctx.SLASH_CLOSE[0];
159-
}
160-
161-
if (openBodyCloseTok !== undefined) {
162-
astNode.syntax.openBody = {
163-
...startOfXMLToken(ctx.OPEN[0]),
164-
...endOfXMLToken(openBodyCloseTok)
165-
};
166-
}
167-
}
168-
169-
if (exists(ctx.SLASH_OPEN) && exists(ctx.END)) {
170-
astNode.syntax.closeBody = {
171-
...startOfXMLToken(ctx.SLASH_OPEN[0]),
172-
...endOfXMLToken(ctx.END[0])
173-
};
174-
}
175143
setChildrenParent(astNode);
176144

177145
return astNode;
@@ -299,7 +267,6 @@ function updateNamespaces(element, prevNamespaces = []) {
299267
}
300268

301269
/**
302-
*
303270
* @param {chevrotain.IToken} token
304271
*/
305272
function toXMLToken(token) {
@@ -352,6 +319,98 @@ function nsToParts(text) {
352319
*/
353320
const invalidSyntax = null;
354321

322+
/**
323+
* @param {XMLElement} astNode
324+
* @param {ElementCtx} ctx
325+
*/
326+
function handleElementOpenCloseNameRanges(astNode, ctx) {
327+
if (ctx.Name !== undefined && ctx.Name[0].isInsertedInRecovery !== true) {
328+
const openNameToken = ctx.Name[0];
329+
astNode.syntax.openName = toXMLToken(openNameToken);
330+
const nsParts = nsToParts(openNameToken.image);
331+
if (nsParts !== null) {
332+
astNode.ns = nsParts.ns;
333+
astNode.name = nsParts.name;
334+
} else {
335+
astNode.name = openNameToken.image;
336+
}
337+
}
338+
339+
if (
340+
ctx.END_NAME !== undefined &&
341+
ctx.END_NAME[0].isInsertedInRecovery !== true
342+
) {
343+
astNode.syntax.closeName = toXMLToken(ctx.END_NAME[0]);
344+
}
345+
}
346+
347+
/**
348+
* @param {XMLElement} astNode
349+
* @param {ElementCtx} ctx
350+
*/
351+
function handleElementOpenCloseBodyRanges(astNode, ctx) {
352+
/* istanbul ignore else - Defensive Coding */
353+
if (exists(ctx.OPEN)) {
354+
let openBodyCloseTok = undefined;
355+
/* istanbul ignore else - Defensive Coding */
356+
if (exists(ctx.START_CLOSE)) {
357+
openBodyCloseTok = ctx.START_CLOSE[0];
358+
} else if (exists(ctx.SLASH_CLOSE)) {
359+
openBodyCloseTok = ctx.SLASH_CLOSE[0];
360+
}
361+
362+
if (openBodyCloseTok !== undefined) {
363+
astNode.syntax.openBody = {
364+
...startOfXMLToken(ctx.OPEN[0]),
365+
...endOfXMLToken(openBodyCloseTok)
366+
};
367+
}
368+
369+
if (exists(ctx.SLASH_OPEN) && exists(ctx.END)) {
370+
astNode.syntax.closeBody = {
371+
...startOfXMLToken(ctx.SLASH_OPEN[0]),
372+
...endOfXMLToken(ctx.END[0])
373+
};
374+
}
375+
}
376+
}
377+
378+
/**
379+
* @param {XMLElement} astNode
380+
* @param {ElementCtx} ctx
381+
* @param {IToken[]} tokenVector
382+
*/
383+
function handleElementAttributeRanges(astNode, ctx, tokenVector) {
384+
if (exists(ctx.Name)) {
385+
const startOffset = ctx.Name[0].endOffset + 2;
386+
// Valid `attributesRange` exists
387+
if (exists(ctx.START_CLOSE) || exists(ctx.SLASH_CLOSE)) {
388+
const endOffset =
389+
(exists(ctx.START_CLOSE)
390+
? ctx.START_CLOSE[0].startOffset
391+
: ctx.SLASH_CLOSE[0].startOffset) - 1;
392+
astNode.syntax.attributesRange = { startOffset, endOffset };
393+
}
394+
// Have to scan-ahead and guess where the attributes range ends
395+
else {
396+
const hasAttributes = isArray(ctx.attribute);
397+
const lastKnownAttribRangeTokenEnd = hasAttributes
398+
? last(ctx.attribute).location.endOffset
399+
: ctx.Name[0].endOffset;
400+
const nextTextualToken = findNextTextualToken(
401+
tokenVector,
402+
lastKnownAttribRangeTokenEnd
403+
);
404+
if (nextTextualToken !== null) {
405+
astNode.syntax.guessedAttributesRange = {
406+
startOffset,
407+
endOffset: nextTextualToken.endOffset - 1
408+
};
409+
}
410+
}
411+
}
412+
}
413+
355414
module.exports = {
356415
buildAst: buildAst
357416
};

packages/ast/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"license": "Apache-2.0",
1212
"typings": "./api.d.ts",
1313
"dependencies": {
14+
"@xml-tools/common": "^0.0.1",
1415
"@xml-tools/parser": "^0.4.0",
1516
"lodash": "4.17.15"
1617
},

packages/ast/scripts/update-snapshots.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ xmlSampleFiles.forEach(fileDesc => {
2121
const xmlInput = readFileSync(fileDesc.path, "utf8");
2222
const simpleNewLinesInput = xmlInput.replace(/\r\n/g, "\n");
2323
console.log(`Reading <${fileDesc.path}>`);
24-
const { cst } = parse(simpleNewLinesInput);
25-
const ast = buildAst(cst);
24+
const { cst, tokenVector } = parse(simpleNewLinesInput);
25+
const ast = buildAst(cst, tokenVector);
2626
modifyAstForAssertions(ast);
2727
const snapshotOutput = `module.exports = { ast : ${JSON.stringify(ast)}}`;
2828
const formattedSnapshotOutput = format(snapshotOutput, { parser: "babel" });

packages/ast/test/sample-test.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ function executeSampleTest(dirPath, assertNoErrors) {
1515
const inputPath = resolve(dirPath, "input.xml");
1616
const inputText = readFileSync(inputPath).toString("utf8");
1717
const simpleNewLinesInput = inputText.replace(/\r\n/g, "\n");
18-
const { cst, lexErrors, parseErrors } = parse(simpleNewLinesInput);
18+
const { cst, tokenVector, lexErrors, parseErrors } = parse(
19+
simpleNewLinesInput
20+
);
1921
if (assertNoErrors === true) {
2022
expect(lexErrors).to.be.empty;
2123
expect(parseErrors).to.be.empty;
2224
}
23-
const ast = buildAst(cst);
25+
const ast = buildAst(cst, tokenVector);
2426
assertParentPropsAreValid(ast);
2527
modifyAstForAssertions(ast);
2628
const expectedOutput = require(resolve(dirPath, "output.js")).ast;

packages/ast/test/snapshots/invalid/attributes-no-value-last/output.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ module.exports = {
4141
openName: { image: "note", startOffset: 1, endOffset: 4 },
4242
closeName: { image: "note", startOffset: 40, endOffset: 43 },
4343
openBody: { startOffset: 0, endOffset: 37 },
44-
closeBody: { startOffset: 38, endOffset: 44 }
44+
closeBody: { startOffset: 38, endOffset: 44 },
45+
attributesRange: { startOffset: 6, endOffset: 36 }
4546
}
4647
},
4748
position: { startOffset: 0, endOffset: 45 }

packages/ast/test/snapshots/invalid/attributes-no-value-middle/output.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ module.exports = {
4141
openName: { image: "note", startOffset: 1, endOffset: 4 },
4242
closeName: { image: "note", startOffset: 40, endOffset: 43 },
4343
openBody: { startOffset: 0, endOffset: 37 },
44-
closeBody: { startOffset: 38, endOffset: 44 }
44+
closeBody: { startOffset: 38, endOffset: 44 },
45+
attributesRange: { startOffset: 6, endOffset: 36 }
4546
}
4647
},
4748
position: { startOffset: 0, endOffset: 45 }

packages/ast/test/snapshots/invalid/attributes-no-value-no-eql-last/output.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ module.exports = {
4141
openName: { image: "note", startOffset: 1, endOffset: 4 },
4242
closeName: { image: "note", startOffset: 39, endOffset: 42 },
4343
openBody: { startOffset: 0, endOffset: 36 },
44-
closeBody: { startOffset: 37, endOffset: 43 }
44+
closeBody: { startOffset: 37, endOffset: 43 },
45+
attributesRange: { startOffset: 6, endOffset: 35 }
4546
}
4647
},
4748
position: { startOffset: 0, endOffset: 44 }

packages/ast/test/snapshots/invalid/attributes-no-value-no-eql-middle/output.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ module.exports = {
4141
openName: { image: "note", startOffset: 1, endOffset: 4 },
4242
closeName: { image: "note", startOffset: 39, endOffset: 42 },
4343
openBody: { startOffset: 0, endOffset: 36 },
44-
closeBody: { startOffset: 37, endOffset: 43 }
44+
closeBody: { startOffset: 37, endOffset: 43 },
45+
attributesRange: { startOffset: 6, endOffset: 35 }
4546
}
4647
},
4748
position: { startOffset: 0, endOffset: 44 }

packages/ast/test/snapshots/invalid/attributes-no-value-not-closed-nested/output.js

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ module.exports = {
3636
textContents: [],
3737
position: { startOffset: 10, endOffset: 35 },
3838
syntax: {
39-
openName: { image: "nested", startOffset: 11, endOffset: 16 }
39+
openName: { image: "nested", startOffset: 11, endOffset: 16 },
40+
guessedAttributesRange: { startOffset: 18, endOffset: 40 }
4041
}
4142
},
4243
{
@@ -57,7 +58,8 @@ module.exports = {
5758
openName: { image: "nest2", startOffset: 42, endOffset: 46 },
5859
closeName: { image: "nest2", startOffset: 61, endOffset: 65 },
5960
openBody: { startOffset: 41, endOffset: 47 },
60-
closeBody: { startOffset: 59, endOffset: 66 }
61+
closeBody: { startOffset: 59, endOffset: 66 },
62+
attributesRange: { startOffset: 48, endOffset: 46 }
6163
}
6264
},
6365
{
@@ -89,7 +91,8 @@ module.exports = {
8991
openName: { image: "nested3", startOffset: 73, endOffset: 79 },
9092
closeName: { image: "nested3", startOffset: 94, endOffset: 100 },
9193
openBody: { startOffset: 72, endOffset: 90 },
92-
closeBody: { startOffset: 92, endOffset: 101 }
94+
closeBody: { startOffset: 92, endOffset: 101 },
95+
attributesRange: { startOffset: 81, endOffset: 89 }
9396
}
9497
}
9598
],
@@ -115,7 +118,8 @@ module.exports = {
115118
openName: { image: "top", startOffset: 1, endOffset: 3 },
116119
closeName: { image: "top", startOffset: 105, endOffset: 107 },
117120
openBody: { startOffset: 0, endOffset: 4 },
118-
closeBody: { startOffset: 103, endOffset: 108 }
121+
closeBody: { startOffset: 103, endOffset: 108 },
122+
attributesRange: { startOffset: 5, endOffset: 3 }
119123
}
120124
},
121125
position: { startOffset: 0, endOffset: 109 }

0 commit comments

Comments
 (0)