Skip to content

Commit

Permalink
feat(ast): attributesRange and guessedAttributeRange support
Browse files Browse the repository at this point in the history
Details in the api.d.ts changed in this commit

BREAKING CHANGE: buildAst now requires a tokenVector argument

fixes #51
  • Loading branch information
bd82 committed Dec 8, 2019
1 parent 61cdcb9 commit 8ce840d
Show file tree
Hide file tree
Showing 32 changed files with 252 additions and 130 deletions.
4 changes: 2 additions & 2 deletions packages/ast/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ const xmlText = `<note>
</note>
`;

const { cst } = parse(xmlText);
const xmlDocAst = buildAst(cst);
const { cst, tokenVector } = parse(xmlText);
const xmlDocAst = buildAst(cst, tokenVector);
console.log(xmlDocAst.rootElement.name); // -> note

// A Visitor allows us to invoke actions on the XML ASTNodes without worrying about
Expand Down
30 changes: 27 additions & 3 deletions packages/ast/api.d.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import { IToken } from "chevrotain";
import { DocumentCstNode } from "@xml-tools/parser";

/**
* Builds an XML Ast from an XML Cst.
* Can process even partial CSTs...
*
* @param docCst
*/
export function buildAst(docCst: DocumentCstNode): XMLDocument;
export function buildAst(
docCst: DocumentCstNode,
tokenVector: IToken[]
): XMLDocument;

/**
* An Abstract Syntax Tree structure
Expand Down Expand Up @@ -85,6 +87,23 @@ declare interface XMLElement {
// Will not exist if any of the closing "brackets" are missing
// - e.g in a self closing element.
readonly closeBody?: XMLToken;

// Describes the range of the attributes section:
// This starts one character **after** the opening name token
// and ends one character before the open body closing '>' or self closing '/>
// Examples:
// <SomeTag attribute1="value1" attrib2="666" >Some content</SomeTag>
// <===== attributesRange =====>
// <SomeTag attribute1="value1" attrib2="666" />
// <===== attributesRange =====>
readonly attributesRange?: SourceRange;

// Same as attributesRange except this property will be used for partially valid
// XMLElements when the AstBuilder cannot be certain what is the exact attributes range.
// - Only one of the attributeRanges properties may exist at the same time.
// - It is possible that in some cases it won't be possible to even guess the attributes range.
// In that scenario neither of the properties will exist.
readonly guessedAttributesRange?: SourceRange;
};
readonly position: SourcePosition;
}
Expand Down Expand Up @@ -114,6 +133,11 @@ declare interface XMLAttribute {
readonly position: SourcePosition;
}

declare interface SourceRange {
readonly startOffset: number;
readonly endOffset: number;
}

declare interface SourcePosition {
readonly startOffset: number;
readonly endOffset: number;
Expand Down
151 changes: 105 additions & 46 deletions packages/ast/lib/build-ast.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const { BaseXmlCstVisitor } = require("@xml-tools/parser");
const {
last,
forEach,
reduce,
map,
Expand All @@ -8,14 +9,16 @@ const {
isEmpty,
isArray
} = require("lodash");

const { findNextTextualToken } = require("@xml-tools/common");
const { getAstChildrenReflective } = require("./utils");

/**
* @param {DocumentCstNode} docCst
* @param {IToken[]} tokenVector
* @returns {XMLDocument}
*/
function buildAst(docCst) {
function buildAst(docCst, tokenVector) {
AstBuilder.setState({ tokenVector });
const xmlDocAst = AstBuilder.visit(docCst);

if (xmlDocAst.rootElement !== invalidSyntax) {
Expand All @@ -29,6 +32,10 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
super();
}

setState({ tokenVector }) {
this.tokenVector = tokenVector;
}

visit(cstNode) {
return super.visit(cstNode, cstNode.location);
}
Expand Down Expand Up @@ -104,7 +111,7 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
}

/**
* @param ctx {ElementCstNode}
* @param ctx {ElementCtx}
* @param location {SourcePosition}
*/
element(ctx, location) {
Expand All @@ -129,49 +136,10 @@ class CstToAstVisitor extends BaseXmlCstVisitor {
astNode.textContents = textContents;
}

if (ctx.Name !== undefined && ctx.Name[0].isInsertedInRecovery !== true) {
const openNameToken = ctx.Name[0];
astNode.syntax.openName = toXMLToken(openNameToken);
const nsParts = nsToParts(openNameToken.image);
if (nsParts !== null) {
astNode.ns = nsParts.ns;
astNode.name = nsParts.name;
} else {
astNode.name = openNameToken.image;
}
}
handleElementOpenCloseNameRanges(astNode, ctx);
handleElementOpenCloseBodyRanges(astNode, ctx);
handleElementAttributeRanges(astNode, ctx, this.tokenVector);

if (
ctx.END_NAME !== undefined &&
ctx.END_NAME[0].isInsertedInRecovery !== true
) {
astNode.syntax.closeName = toXMLToken(ctx.END_NAME[0]);
}

/* istanbul ignore else - Defensive Coding */
if (exists(ctx.OPEN)) {
let openBodyCloseTok = undefined;
/* istanbul ignore else - Defensive Coding */
if (exists(ctx.START_CLOSE)) {
openBodyCloseTok = ctx.START_CLOSE[0];
} else if (exists(ctx.SLASH_CLOSE)) {
openBodyCloseTok = ctx.SLASH_CLOSE[0];
}

if (openBodyCloseTok !== undefined) {
astNode.syntax.openBody = {
...startOfXMLToken(ctx.OPEN[0]),
...endOfXMLToken(openBodyCloseTok)
};
}
}

if (exists(ctx.SLASH_OPEN) && exists(ctx.END)) {
astNode.syntax.closeBody = {
...startOfXMLToken(ctx.SLASH_OPEN[0]),
...endOfXMLToken(ctx.END[0])
};
}
setChildrenParent(astNode);

return astNode;
Expand Down Expand Up @@ -299,7 +267,6 @@ function updateNamespaces(element, prevNamespaces = []) {
}

/**
*
* @param {chevrotain.IToken} token
*/
function toXMLToken(token) {
Expand Down Expand Up @@ -352,6 +319,98 @@ function nsToParts(text) {
*/
const invalidSyntax = null;

/**
* @param {XMLElement} astNode
* @param {ElementCtx} ctx
*/
function handleElementOpenCloseNameRanges(astNode, ctx) {
if (ctx.Name !== undefined && ctx.Name[0].isInsertedInRecovery !== true) {
const openNameToken = ctx.Name[0];
astNode.syntax.openName = toXMLToken(openNameToken);
const nsParts = nsToParts(openNameToken.image);
if (nsParts !== null) {
astNode.ns = nsParts.ns;
astNode.name = nsParts.name;
} else {
astNode.name = openNameToken.image;
}
}

if (
ctx.END_NAME !== undefined &&
ctx.END_NAME[0].isInsertedInRecovery !== true
) {
astNode.syntax.closeName = toXMLToken(ctx.END_NAME[0]);
}
}

/**
* @param {XMLElement} astNode
* @param {ElementCtx} ctx
*/
function handleElementOpenCloseBodyRanges(astNode, ctx) {
/* istanbul ignore else - Defensive Coding */
if (exists(ctx.OPEN)) {
let openBodyCloseTok = undefined;
/* istanbul ignore else - Defensive Coding */
if (exists(ctx.START_CLOSE)) {
openBodyCloseTok = ctx.START_CLOSE[0];
} else if (exists(ctx.SLASH_CLOSE)) {
openBodyCloseTok = ctx.SLASH_CLOSE[0];
}

if (openBodyCloseTok !== undefined) {
astNode.syntax.openBody = {
...startOfXMLToken(ctx.OPEN[0]),
...endOfXMLToken(openBodyCloseTok)
};
}

if (exists(ctx.SLASH_OPEN) && exists(ctx.END)) {
astNode.syntax.closeBody = {
...startOfXMLToken(ctx.SLASH_OPEN[0]),
...endOfXMLToken(ctx.END[0])
};
}
}
}

/**
* @param {XMLElement} astNode
* @param {ElementCtx} ctx
* @param {IToken[]} tokenVector
*/
function handleElementAttributeRanges(astNode, ctx, tokenVector) {
if (exists(ctx.Name)) {
const startOffset = ctx.Name[0].endOffset + 2;
// Valid `attributesRange` exists
if (exists(ctx.START_CLOSE) || exists(ctx.SLASH_CLOSE)) {
const endOffset =
(exists(ctx.START_CLOSE)
? ctx.START_CLOSE[0].startOffset
: ctx.SLASH_CLOSE[0].startOffset) - 1;
astNode.syntax.attributesRange = { startOffset, endOffset };
}
// Have to scan-ahead and guess where the attributes range ends
else {
const hasAttributes = isArray(ctx.attribute);
const lastKnownAttribRangeTokenEnd = hasAttributes
? last(ctx.attribute).location.endOffset
: ctx.Name[0].endOffset;
const nextTextualToken = findNextTextualToken(
tokenVector,
lastKnownAttribRangeTokenEnd
);
if (nextTextualToken !== null) {
astNode.syntax.guessedAttributesRange = {
startOffset,
endOffset: nextTextualToken.endOffset - 1
};
}
}
}
}

module.exports = {
buildAst: buildAst
};
1 change: 1 addition & 0 deletions packages/ast/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"license": "Apache-2.0",
"typings": "./api.d.ts",
"dependencies": {
"@xml-tools/common": "^0.0.1",
"@xml-tools/parser": "^0.4.0",
"lodash": "4.17.15"
},
Expand Down
4 changes: 2 additions & 2 deletions packages/ast/scripts/update-snapshots.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ xmlSampleFiles.forEach(fileDesc => {
const xmlInput = readFileSync(fileDesc.path, "utf8");
const simpleNewLinesInput = xmlInput.replace(/\r\n/g, "\n");
console.log(`Reading <${fileDesc.path}>`);
const { cst } = parse(simpleNewLinesInput);
const ast = buildAst(cst);
const { cst, tokenVector } = parse(simpleNewLinesInput);
const ast = buildAst(cst, tokenVector);
modifyAstForAssertions(ast);
const snapshotOutput = `module.exports = { ast : ${JSON.stringify(ast)}}`;
const formattedSnapshotOutput = format(snapshotOutput, { parser: "babel" });
Expand Down
6 changes: 4 additions & 2 deletions packages/ast/test/sample-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ function executeSampleTest(dirPath, assertNoErrors) {
const inputPath = resolve(dirPath, "input.xml");
const inputText = readFileSync(inputPath).toString("utf8");
const simpleNewLinesInput = inputText.replace(/\r\n/g, "\n");
const { cst, lexErrors, parseErrors } = parse(simpleNewLinesInput);
const { cst, tokenVector, lexErrors, parseErrors } = parse(
simpleNewLinesInput
);
if (assertNoErrors === true) {
expect(lexErrors).to.be.empty;
expect(parseErrors).to.be.empty;
}
const ast = buildAst(cst);
const ast = buildAst(cst, tokenVector);
assertParentPropsAreValid(ast);
modifyAstForAssertions(ast);
const expectedOutput = require(resolve(dirPath, "output.js")).ast;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ module.exports = {
openName: { image: "note", startOffset: 1, endOffset: 4 },
closeName: { image: "note", startOffset: 40, endOffset: 43 },
openBody: { startOffset: 0, endOffset: 37 },
closeBody: { startOffset: 38, endOffset: 44 }
closeBody: { startOffset: 38, endOffset: 44 },
attributesRange: { startOffset: 6, endOffset: 36 }
}
},
position: { startOffset: 0, endOffset: 45 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ module.exports = {
openName: { image: "note", startOffset: 1, endOffset: 4 },
closeName: { image: "note", startOffset: 40, endOffset: 43 },
openBody: { startOffset: 0, endOffset: 37 },
closeBody: { startOffset: 38, endOffset: 44 }
closeBody: { startOffset: 38, endOffset: 44 },
attributesRange: { startOffset: 6, endOffset: 36 }
}
},
position: { startOffset: 0, endOffset: 45 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ module.exports = {
openName: { image: "note", startOffset: 1, endOffset: 4 },
closeName: { image: "note", startOffset: 39, endOffset: 42 },
openBody: { startOffset: 0, endOffset: 36 },
closeBody: { startOffset: 37, endOffset: 43 }
closeBody: { startOffset: 37, endOffset: 43 },
attributesRange: { startOffset: 6, endOffset: 35 }
}
},
position: { startOffset: 0, endOffset: 44 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ module.exports = {
openName: { image: "note", startOffset: 1, endOffset: 4 },
closeName: { image: "note", startOffset: 39, endOffset: 42 },
openBody: { startOffset: 0, endOffset: 36 },
closeBody: { startOffset: 37, endOffset: 43 }
closeBody: { startOffset: 37, endOffset: 43 },
attributesRange: { startOffset: 6, endOffset: 35 }
}
},
position: { startOffset: 0, endOffset: 44 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ module.exports = {
textContents: [],
position: { startOffset: 10, endOffset: 35 },
syntax: {
openName: { image: "nested", startOffset: 11, endOffset: 16 }
openName: { image: "nested", startOffset: 11, endOffset: 16 },
guessedAttributesRange: { startOffset: 18, endOffset: 40 }
}
},
{
Expand All @@ -57,7 +58,8 @@ module.exports = {
openName: { image: "nest2", startOffset: 42, endOffset: 46 },
closeName: { image: "nest2", startOffset: 61, endOffset: 65 },
openBody: { startOffset: 41, endOffset: 47 },
closeBody: { startOffset: 59, endOffset: 66 }
closeBody: { startOffset: 59, endOffset: 66 },
attributesRange: { startOffset: 48, endOffset: 46 }
}
},
{
Expand Down Expand Up @@ -89,7 +91,8 @@ module.exports = {
openName: { image: "nested3", startOffset: 73, endOffset: 79 },
closeName: { image: "nested3", startOffset: 94, endOffset: 100 },
openBody: { startOffset: 72, endOffset: 90 },
closeBody: { startOffset: 92, endOffset: 101 }
closeBody: { startOffset: 92, endOffset: 101 },
attributesRange: { startOffset: 81, endOffset: 89 }
}
}
],
Expand All @@ -115,7 +118,8 @@ module.exports = {
openName: { image: "top", startOffset: 1, endOffset: 3 },
closeName: { image: "top", startOffset: 105, endOffset: 107 },
openBody: { startOffset: 0, endOffset: 4 },
closeBody: { startOffset: 103, endOffset: 108 }
closeBody: { startOffset: 103, endOffset: 108 },
attributesRange: { startOffset: 5, endOffset: 3 }
}
},
position: { startOffset: 0, endOffset: 109 }
Expand Down
Loading

0 comments on commit 8ce840d

Please sign in to comment.