diff --git a/core/src/main/java/com/salesforce/slds/shared/RegexPattern.java b/core/src/main/java/com/salesforce/slds/shared/RegexPattern.java index 343637f..caeeec4 100644 --- a/core/src/main/java/com/salesforce/slds/shared/RegexPattern.java +++ b/core/src/main/java/com/salesforce/slds/shared/RegexPattern.java @@ -41,11 +41,6 @@ public class RegexPattern { public static final String NUMERIC_PATTERN = "[-|+]?" + NUMBER_FRAGMENT + "\\s*[a-zA-Z]+" + "|" + PERCENT_PATTERN + "|" + "[-|+]?" + NUMBER_FRAGMENT; - public static final String START_TAG_PATTERN = - "(?<(?!--)|<(?[\\w-]+(:?[\\w-]+)?)|(?\\{|[\\\\]?['\"]+))"; - public static final String END_TAG_PATTERN = - "(?/?>|(?--\\s*>)|[\\w-]+(:?[\\w-]+)?)\\s*>|(?\\}))"; - public static final String AURA_TOKEN_FUNCTION = "t(?:oken)?\\((?[\\w\\d]+)\\)"; public static final String VAR_FUNCTION = "var\\(\\s*--lwc-(?[\\w\\d-]+)\\s*(,\\s*(?"+ COLOR_PATTERN + "|" + NUMERIC_PATTERN + "|" + WORD_FRAGMENT + ")\\s*)?\\)"; diff --git a/core/src/main/java/com/salesforce/slds/shared/parsers/markup/MarkupParser.java b/core/src/main/java/com/salesforce/slds/shared/parsers/markup/MarkupParser.java index 060f250..cb47d4b 100644 --- a/core/src/main/java/com/salesforce/slds/shared/parsers/markup/MarkupParser.java +++ b/core/src/main/java/com/salesforce/slds/shared/parsers/markup/MarkupParser.java @@ -6,285 +6,43 @@ */ package com.salesforce.slds.shared.parsers.markup; -import com.google.common.collect.ImmutableSet; -import com.salesforce.slds.shared.RegexPattern; import com.salesforce.slds.shared.models.core.HTMLElement; -import com.salesforce.slds.shared.models.locations.Location; -import com.salesforce.slds.shared.models.locations.Range; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.parser.LightningTreeBuilder; import org.jsoup.parser.Parser; import org.springframework.util.StringUtils; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; public class MarkupParser { - public static List parse(String path, List lines) { - return new MarkupProcessor(path, lines).parse(); - } - - private static class MarkupProcessor { - final List htmlElements = new ArrayList<>(); - - final String path; - final List lines; - - MarkupProcessor(String path, List lines) { - this.path = path; - this.lines = lines; - } - - - List createTagInformation() { - final Stack commitBlocks = new Stack<>(); - final Stack quotes = new Stack<>(); - - List result = new ArrayList<>(); - Stack processing = new Stack<>(); - - for (int index = 0; index < lines.size() ; index ++) { - String line = lines.get(index); - - Matcher matcher = TAG.matcher(line); - - while(matcher.find()) { - boolean isComment = matcher.group("startComment") != null || - matcher.group("endComment") != null; - - boolean isStartTag = matcher.group("start") != null; - - Location start = new Location(index, matcher.start()); - Location end = new Location(index, matcher.end()); - String tag = matcher.group(isStartTag ? "startTag" : "endTag"); - - - if (processingComment(isComment, tag, isStartTag, commitBlocks)) { - continue; - } - - String skipStart = matcher.group("startSkip"); - String skipEnd = matcher.group("endSkip"); - - if (skipStart != null || skipEnd != null) { - String content = skipStart != null ? skipStart : skipEnd; - - boolean withinTag = processing.isEmpty() == false; - - if (content.startsWith("\\") == false && withinTag ) { - for (char character : content.toCharArray()) { - if (QUOTES.contains(character)) { - String top = quotes.empty() ? "" : quotes.peek(); - String inQuestion = String.valueOf(character); - - boolean popQuote = (top.contentEquals("{") && inQuestion.contentEquals("}")) - || (inQuestion.equalsIgnoreCase("{") == false && inQuestion.equalsIgnoreCase(top)); - - - if (quotes.empty() == false && popQuote) { - quotes.pop(); - } else { - if (quotes.isEmpty() && inQuestion.equalsIgnoreCase("}") == false) { - quotes.push(inQuestion); - } - } - } - } - - } - - continue; - } - - if (quotes.isEmpty() == false) { continue;} - - if (isStartTag) { - processing.push(new TagInformation(start, end, tag, TagInformation.TagType.INCOMPLETE)); - } else { - if (tag != null) { - TagInformation tagInformation = - new TagInformation(start, end, tag, TagInformation.TagType.CLOSE); - - result.add(tagInformation); - - } else if (processing.empty() == false) { - TagInformation startTag = processing.pop(); - - TagInformation.TagType type = matcher.group("end").length() == 1 ? - TagInformation.TagType.OPEN : TagInformation.TagType.SELF_CLOSING; - - TagInformation tagInformation = - new TagInformation(startTag.start, end, startTag.tag, type); - - result.add(tagInformation); - } - } - } - } - - return result; - } - - - List parse() { - List tags = createTagInformation(); - - final Stack internal = new Stack<>(); - final Stack names = new Stack<>(); - - for (int index = 0 ; index < tags.size() ; index ++) { - TagInformation tag = tags.get(index); - - switch(tag.type) { - case INCOMPLETE: - continue; - case OPEN: - internal.push(tag); - names.push(tag.tag); - break; - case SELF_CLOSING: - process(tag, internal); - break; - case CLOSE: - break; - } - - if (tag.type == TagInformation.TagType.CLOSE) { - TagInformation open = internal.empty()? null : internal.pop(); - String name = names.empty() ? null : names.pop(); - if (open == null) { - process(tag, null); - continue; - } - - if ((open.tag.equalsIgnoreCase(tag.tag) == false && names.contains(tag.tag) == false)) { - process(tag, internal); - - internal.push(open); - names.push(name); - continue; - } - - if (open.tag.equalsIgnoreCase(tag.tag) == false && names.contains(tag.tag)) { - process(open, internal); - index --; - continue; - } - - if (open.tag.equalsIgnoreCase(tag.tag)) { - TagInformation newTag = new TagInformation(open.start, tag.end, open.tag, - TagInformation.TagType.CLOSE); - newTag.children.addAll(open.children); - - process(newTag, internal); - } - } - - } - - if (internal.empty() == false) { - process(internal.pop(), null); - } - - htmlElements.sort(HTMLElement::compareTo); - - return htmlElements; - } - - private void process(TagInformation tag, Stack stack) { - List raw = extract(lines, tag.start, tag.end); - - HTMLElement htmlElement = HTMLElement.builder() - .raw(raw) - .element(createElement(raw, tag, path)) - .range(new Range(tag.start, tag.end)) - .build(); - - htmlElements.add(htmlElement); - - if (stack != null && stack.size() > 0) { - stack.peek().children.add(htmlElement); - } - } - - private boolean processingComment(boolean isComment, String tag, boolean isStartTag, Stack stack) { - // add Tag to support - - boolean process = isComment || (tag != null && tag.equalsIgnoreCase("script")); - - if (process) { - String top = stack.empty() ? "" : stack.peek(); - String identifier = tag == null ? "" : tag; - - if (isStartTag) { - stack.push(identifier); - } else { - if (top.equals(identifier)) { - stack.pop(); - } - } - - return true; - } - - return stack.empty() == false; - } + public static List parse(String path, List lines) { + String html = StringUtils.collectionToDelimitedString(lines, System.lineSeparator()); + Document document = Jsoup.parse(html, path, new Parser(new LightningTreeBuilder(lines))); - private List extract(List lines, Location start, Location end) { - List raw = new ArrayList<>(); + MarkupVisitor visitor = new MarkupVisitor(); + NodeTraversor.traverse(visitor, document.children()); - for (int index = start.getLine() ; - index <= end.getLine() ; - index++ - ) { - String line = lines.get(index); - int startIndex = start.getLine() == index ? - start.getColumn() : 0; + visitor.htmlElements.sort(HTMLElement::compareTo); + return visitor.htmlElements; + } - int endIndex = end.getLine() == index ? - end.getColumn() : line.length(); + private static class MarkupVisitor implements NodeVisitor { + final List htmlElements = new ArrayList<>(); - raw.add(line.substring(startIndex, endIndex)); + @Override + public void head(Node node, int depth) { + if (node instanceof LightningTreeBuilder.ElementWithPosition) { + htmlElements.add(((LightningTreeBuilder.ElementWithPosition) node).toHTMLElement()); } - - return raw; } - private Element createElement(List lines, TagInformation tagInformation, String baseUri) { - String html = StringUtils.collectionToDelimitedString(lines, System.lineSeparator()); - if (html.startsWith(" children = tagInformation.children; - - //handle self closing tag within custom element - if (result.tagName().equalsIgnoreCase("head") == false && result.children().size() != children.size()) { - Document htmlVersion = Jsoup.parse(html, baseUri, Parser.htmlParser()); - Element firstChild = htmlVersion.body().child(0); - - result.replaceWith(firstChild); - result = document.child(0); - } - - if (children.size() == result.children().size()) { - for (int index = 0; index < children.size(); index++) { - Element childElement = result.child(index); - childElement.replaceWith(children.get(index).getContent()); - } - } - - return result; - } + @Override + public void tail(Node node, int depth) { } } - - static final Set QUOTES = ImmutableSet.of('{', '}', '\'', '"'); - static final Pattern TAG = Pattern.compile(RegexPattern.START_TAG_PATTERN + "|" + RegexPattern.END_TAG_PATTERN); } diff --git a/core/src/main/java/com/salesforce/slds/shared/parsers/markup/TagInformation.java b/core/src/main/java/com/salesforce/slds/shared/parsers/markup/TagInformation.java deleted file mode 100644 index 7e49915..0000000 --- a/core/src/main/java/com/salesforce/slds/shared/parsers/markup/TagInformation.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018, salesforce.com, inc. - * All rights reserved. - * SPDX-License-Identifier: BSD-3-Clause - * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause - */ -package com.salesforce.slds.shared.parsers.markup; - -import com.salesforce.slds.shared.models.core.HTMLElement; -import com.salesforce.slds.shared.models.locations.Location; -import org.apache.commons.lang3.builder.EqualsBuilder; -import org.apache.commons.lang3.builder.HashCodeBuilder; - -import java.util.ArrayList; -import java.util.List; - -class TagInformation implements Comparable { - - enum TagType {INCOMPLETE, OPEN, SELF_CLOSING, CLOSE} - - final Location start; - final Location end; - final String tag; - final TagType type; - final List children = new ArrayList<>(); - - TagInformation(Location start, Location end, String tag, TagType type) { - this.start = start; - this.end = end; - this.tag = tag; - this.type = type; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - - if (o == null || getClass() != o.getClass()) return false; - - TagInformation that = (TagInformation) o; - - return new EqualsBuilder() - .append(start, that.start) - .append(end, that.end) - .append(tag, that.tag) - .append(type, that.type) - .isEquals(); - } - - @Override - public int hashCode() { - return new HashCodeBuilder(17, 37) - .append(start) - .append(end) - .append(type) - .append(tag) - .toHashCode(); - } - - @Override - public int compareTo(TagInformation o) { - int compare = start.getLine() - o.start.getLine(); - - if (compare != 0) { - return compare; - } - - compare = start.getColumn() - o.start.getColumn(); - if (compare != 0){ - return compare; - } - - return 0; - } -} diff --git a/core/src/main/java/org/jsoup/parser/LightningTreeBuilder.java b/core/src/main/java/org/jsoup/parser/LightningTreeBuilder.java new file mode 100644 index 0000000..5ded239 --- /dev/null +++ b/core/src/main/java/org/jsoup/parser/LightningTreeBuilder.java @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2018, salesforce.com, inc. + * All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +package org.jsoup.parser; + +import com.google.common.collect.Lists; +import com.salesforce.slds.shared.models.core.HTMLElement; +import com.salesforce.slds.shared.models.locations.Location; +import com.salesforce.slds.shared.models.locations.Range; +import org.jsoup.nodes.Attributes; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.springframework.util.StringUtils; + +import java.util.List; +import java.util.Stack; + +public class LightningTreeBuilder extends XmlTreeBuilder { + + public static class ElementWithPosition extends Element { + + private int startPosition = -1; + private int endPosition = -1; + private Location start = new Location(-1, -1); + private Location end = new Location(-1, -1); + private List raw; + + private ElementWithPosition(Tag tag, String baseUri, Attributes attributes) { + super(tag, baseUri, attributes); + } + + public HTMLElement toHTMLElement() { + return HTMLElement.builder().element(this).raw(this.raw).range(new Range(start, end)).build(); + } + } + + private final List raw; + private final String html; + private final Stack skipStack = new Stack<>(); + + public LightningTreeBuilder(List raw) { + this.raw = raw; + this.html = StringUtils.collectionToDelimitedString(raw, System.lineSeparator()); + } + + @Override + protected boolean process(Token token) { + Token.TokenType type = token.type; + + if (type == Token.TokenType.StartTag) { + insert(token.asStartTag()); + } else if (type == Token.TokenType.EndTag){ + popStackToClose(token.asEndTag()); + } else { + super.process(token); + } + + return true; + } + + private void popStackToClose(Token.EndTag endTag) { + String elName = settings.normalizeTag(endTag.tagName); + Element firstFound = null; + + if (skipStack.isEmpty() == false) { + if (elName.equalsIgnoreCase("script")) { + skipStack.pop(); + } + return; + } + + for (int pos = stack.size() -1; pos >= 0; pos--) { + Element next = stack.get(pos); + if (next.nodeName().equals(elName)) { + firstFound = next; + break; + } + } + if (firstFound == null) { + Tag tag = Tag.valueOf(endTag.name(), settings); + ElementWithPosition el = new ElementWithPosition(tag, null, settings.normalizeAttributes(endTag.attributes)); + updateLocation(el, endTag); + insertNode(el); + return; // not found, skip + } + + for (int pos = stack.size() -1; pos >= 0; pos--) { + ElementWithPosition next = (ElementWithPosition)stack.get(pos); + stack.remove(pos); + updateLocation(next, endTag); + if (next == firstFound) + break; + } + } + + + Element insert(Token.StartTag startTag) { + Tag tag = Tag.valueOf(startTag.name(), settings); + if (startTag.attributes != null) + startTag.attributes.deduplicate(settings); + + String elName = settings.normalizeTag(startTag.tagName); + if (elName.equalsIgnoreCase("script") && startTag.isSelfClosing() == false) { + skipStack.push(startTag); + } + + if (skipStack.isEmpty() == false) { + return null; + } + + ElementWithPosition el = new ElementWithPosition(tag, null, settings.normalizeAttributes(startTag.attributes)); + updateLocation(el, startTag); + insertNode(el); + if (startTag.isSelfClosing()) { + if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above. + tag.setSelfClosing(); + } else { + stack.add(el); + } + return el; + } + + private void updateLocation(ElementWithPosition element, Token.Tag tag) { + int position = this.reader.pos(); + + if (tag instanceof Token.StartTag) { + int marker = this.html.lastIndexOf(tag.name(), position); + int startTagSymbol = this.html.lastIndexOf("<", marker); + + element.startPosition = startTagSymbol; + element.start = convertPositionToLocation(element.startPosition); + } + + if (tag instanceof Token.EndTag || tag.isSelfClosing()) { + element.endPosition = position; + element.end = convertPositionToLocation(element.endPosition); + + if (element.startPosition == -1) { + updateLocation(element, new Token.StartTag().name(settings.normalizeTag(tag.tagName))); + } + + element.raw = Lists.newArrayList( + this.html.substring(element.startPosition, element.endPosition).split(System.lineSeparator())); + } + } + + private Location convertPositionToLocation(int position) { + int line = 0; + int col = 0; + + while (position > 0) { + int lineLength = this.raw.get(line).length(); + + if (lineLength >= position) { + col = position; + } else { + line++; + position -= System.lineSeparator().length(); + } + + position -= lineLength; + } + + return new Location(line, col); + } + + private void insertNode(Node node) { + currentElement().appendChild(node); + } +} diff --git a/core/src/test/java/com/salesforce/slds/shared/parsers/MarkupParserTest.java b/core/src/test/java/com/salesforce/slds/shared/parsers/MarkupParserTest.java index 88e33d9..4dacf0a 100644 --- a/core/src/test/java/com/salesforce/slds/shared/parsers/MarkupParserTest.java +++ b/core/src/test/java/com/salesforce/slds/shared/parsers/MarkupParserTest.java @@ -235,4 +235,27 @@ void issue8() { assertThat(elements.size(), Matchers.is(1)); } } + + + @Test + void brTagWithNoTrailingTagsOrText() { + List html = Lists.newArrayList("