-
-
Notifications
You must be signed in to change notification settings - Fork 201
/
htmlxparser.ts
122 lines (108 loc) · 4.66 KB
/
htmlxparser.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import parse5, {
DefaultTreeDocumentFragment,
DefaultTreeElement,
DefaultTreeTextNode,
} from 'parse5';
import compiler from 'svelte/compiler';
import { Node } from 'estree-walker';
function walkAst(doc: DefaultTreeElement, action: (c: DefaultTreeElement) => void) {
action(doc);
if (!doc.childNodes) return;
for (let i = 0; i < doc.childNodes.length; i++) {
walkAst(doc.childNodes[i] as DefaultTreeElement, action);
}
}
export function findVerbatimElements(htmlx: string) {
const elements: Node[] = [];
const tagNames = ['script', 'style'];
const parseOpts = { sourceCodeLocationInfo: true };
const doc = parse5.parseFragment(htmlx, parseOpts) as DefaultTreeDocumentFragment;
const checkCase = (content: DefaultTreeTextNode, el: parse5.DefaultTreeElement) => {
const orgStart = el.sourceCodeLocation.startOffset || 0;
const orgEnd = el.sourceCodeLocation.endOffset || 0;
const outerHtml = htmlx.substring(orgStart, orgEnd);
const onlyTag = content ? outerHtml.replace(content.value, '') : outerHtml;
return tagNames.some((tag) => onlyTag.match(tag));
};
walkAst(doc as DefaultTreeElement, (el) => {
const parseValue = (attr: parse5.Attribute) => {
const sourceCodeLocation = el.sourceCodeLocation.attrs[attr.name];
const { startOffset, endOffset } = sourceCodeLocation;
const beforeAttrEnd = htmlx.substring(0, endOffset);
const valueStartIndex = beforeAttrEnd.indexOf('=', startOffset);
const isBare = valueStartIndex === -1;
return {
type: 'Attribute',
name: attr.name,
value: isBare || [
{
type: 'Text',
start: valueStartIndex + 1,
end: endOffset,
raw: attr.value,
},
],
start: startOffset,
end: endOffset,
};
};
if (tagNames.includes(el.nodeName)) {
const hasNodes = el.childNodes && el.childNodes.length > 0;
const content = hasNodes ? (el.childNodes[0] as DefaultTreeTextNode) : null;
if (!checkCase(content, el)) {
return;
}
elements.push({
start: el.sourceCodeLocation.startOffset,
end: el.sourceCodeLocation.endOffset,
type: el.nodeName[0].toUpperCase() + el.nodeName.substr(1),
attributes: !el.attrs ? [] : el.attrs.map((a) => parseValue(a)),
content:
content === null
? {
type: 'Text',
start: el.sourceCodeLocation.startTag.endCol,
end: el.sourceCodeLocation.endTag.startCol,
value: '',
raw: '',
}
: {
type: 'Text',
start: content.sourceCodeLocation.startOffset,
end: content.sourceCodeLocation.endOffset,
value: content.value,
raw: content.value,
},
});
}
});
return elements;
}
export function blankVerbatimContent(htmlx: string, verbatimElements: Node[]) {
let output = htmlx;
for (const node of verbatimElements) {
const content = node.content;
if (content) {
output =
output.substring(0, content.start) +
output.substring(content.start, content.end).replace(/[^\n]/g, ' ') +
output.substring(content.end);
}
}
return output;
}
export function parseHtmlx(htmlx: string): Node {
//Svelte tries to parse style and script tags which doesn't play well with typescript, so we blank them out.
//HTMLx spec says they should just be retained after processing as is, so this is fine
const verbatimElements = findVerbatimElements(htmlx);
const deconstructed = blankVerbatimContent(htmlx, verbatimElements);
//extract the html content parsed as htmlx this excludes our script and style tags
const svelteHtmlxAst = compiler.parse(deconstructed).html;
//restore our script and style tags as nodes to maintain validity with HTMLx
for (const s of verbatimElements) {
svelteHtmlxAst.children.push(s);
svelteHtmlxAst.start = Math.min(svelteHtmlxAst.start, s.start);
svelteHtmlxAst.end = Math.max(svelteHtmlxAst.end, s.end);
}
return svelteHtmlxAst;
}