Skip to content

Commit

Permalink
Complete adoption agency algorithm (#1517)
Browse files Browse the repository at this point in the history
Follow adoption agency algorithm
  • Loading branch information
suarez12138 authored Jul 9, 2021
1 parent f49f92c commit e45e53c
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 5 deletions.
19 changes: 18 additions & 1 deletion src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,14 @@ Element lastFormattingElement() {
return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
}

int positionOfElement(Element el){
for (int i = 0; i < formattingElements.size(); i++){
if (el == formattingElements.get(i))
return i;
}
return -1;
}

Element removeLastFormattingElement() {
int size = formattingElements.size();
if (size > 0)
Expand All @@ -623,6 +631,16 @@ Element removeLastFormattingElement() {

// active formatting elements
void pushActiveFormattingElements(Element in) {
this.checkActiveFormattingElements(in);
formattingElements.add(in);
}

void pushWithBookmark(Element in,int bookmark){
this.checkActiveFormattingElements(in);
formattingElements.add(bookmark, in);
}

void checkActiveFormattingElements(Element in){
int numSeen = 0;
for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
Element el = formattingElements.get(pos);
Expand All @@ -637,7 +655,6 @@ void pushActiveFormattingElements(Element in) {
break;
}
}
formattingElements.add(in);
}

private boolean isSameFormattingElement(Element a, Element b) {
Expand Down
11 changes: 7 additions & 4 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -806,11 +806,14 @@ else if (!tb.onStack(formatEl)) {
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
// run-aways
final int stackSize = stack.size();
int bookmark = -1;
for (int si = 0; si < stackSize && si < 64; si++) {
el = stack.get(si);
if (el == formatEl) {
commonAncestor = stack.get(si - 1);
seenFormattingElement = true;
// Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
bookmark = tb.positionOfElement(el);
} else if (seenFormattingElement && tb.isSpecial(el)) {
furthestBlock = el;
break;
Expand All @@ -822,8 +825,6 @@ else if (!tb.onStack(formatEl)) {
return true;
}

// todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
// does that mean: int pos of format el in list?
Element node = furthestBlock;
Element lastNode = furthestBlock;
for (int j = 0; j < 3; j++) {
Expand All @@ -843,8 +844,9 @@ else if (!tb.onStack(formatEl)) {

//noinspection StatementWithEmptyBody
if (lastNode == furthestBlock) {
// todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
// move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
// not getting how this bookmark both straddles the element above, but is inbetween here...
bookmark = tb.positionOfElement(node) + 1;
}
if (lastNode.parent() != null)
lastNode.remove();
Expand All @@ -871,7 +873,8 @@ else if (!tb.onStack(formatEl)) {
}
furthestBlock.appendChild(adopter);
tb.removeFromActiveFormattingElements(formatEl);
// todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
// insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
tb.pushWithBookmark(adopter, bookmark);
tb.removeFromStack(formatEl);
tb.insertOnStackAfter(furthestBlock, adopter);
}
Expand Down
51 changes: 51 additions & 0 deletions src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jsoup.parser;

import org.jsoup.Jsoup;
import org.jsoup.parser.HtmlTreeBuilderState.Constants;
import org.junit.jupiter.api.Test;

Expand Down Expand Up @@ -46,4 +47,54 @@ public void ensureArraysAreSorted() {
assertEquals(38, constants.size());
}


@Test
public void nestedAnchorElements01() {
String html = "<html>\n" +
" <body>\n" +
" <a href='#1'>\n" +
" <div>\n" +
" <a href='#2'>child</a>\n" +
" </div>\n" +
" </a>\n" +
" </body>\n" +
"</html>";
String s = Jsoup.parse(html).toString();
assertEquals("<html> \n" +
" <head></head>\n" +
" <body> <a href=\"#1\"> </a>\n" +
" <div>\n" +
" <a href=\"#1\"> </a><a href=\"#2\">child</a> \n" +
" </div> \n" +
" </body>\n" +
"</html>", s);
}

@Test
public void nestedAnchorElements02() {
String html = "<html>\n" +
" <body>\n" +
" <a href='#1'>\n" +
" <div>\n" +
" <div>\n" +
" <a href='#2'>child</a>\n" +
" </div>\n" +
" </div>\n" +
" </a>\n" +
" </body>\n" +
"</html>";
String s = Jsoup.parse(html).toString();
assertEquals("<html> \n" +
" <head></head>\n" +
" <body> <a href=\"#1\"> </a>\n" +
" <div>\n" +
" <a href=\"#1\"> </a>\n" +
" <div>\n" +
" <a href=\"#1\"> </a><a href=\"#2\">child</a> \n" +
" </div> \n" +
" </div> \n" +
" </body>\n" +
"</html>", s);
}

}

0 comments on commit e45e53c

Please sign in to comment.