Skip to content

Commit

Permalink
Merge pull request #733 from HubSpot/clean-html
Browse files Browse the repository at this point in the history
Fix striptags to clean HTML instead of parsing
  • Loading branch information
mattcoley authored Aug 19, 2021
2 parents 726b686 + 5874e7f commit 1c1ac14
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.hubspot.jinjava.interpret.JinjavaInterpreter;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;

/**
* striptags(value) Strip SGML/XML tags and replace adjacent whitespace by one space.
Expand Down Expand Up @@ -34,8 +35,9 @@ public Object filter(Object object, JinjavaInterpreter interpreter, String... ar
}

String val = interpreter.renderFlat((String) object);
String strippedVal = Jsoup.parseBodyFragment(val).text();
String normalizedVal = WHITESPACE.matcher(strippedVal).replaceAll(" ");
String cleanedVal = Jsoup.clean(val, Whitelist.none());

String normalizedVal = WHITESPACE.matcher(cleanedVal).replaceAll(" ");

return normalizedVal;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public void itPassesThruNonStringVals() throws Exception {
@Test
public void itWorksWithNonHtmlStrings() throws Exception {
assertThat(filter.filter("foo", interpreter)).isEqualTo("foo");
assertThat(filter.filter("foo < bar", interpreter)).isEqualTo("foo < bar");
assertThat(filter.filter("foo < bar", interpreter)).isEqualTo("foo &lt; bar");
}

@Test
Expand All @@ -51,4 +51,16 @@ public void itStripsTagsFromHtml() throws Exception {
assertThat(filter.filter("foo <b>bar</b> other", interpreter))
.isEqualTo("foo bar other");
}

@Test
public void itStripsTagsFromNestedHtml() throws Exception {
assertThat(filter.filter("<div><strong>test</strong></div>", interpreter))
.isEqualTo("test");
}

@Test
public void itStripsTagsFromEscapedHtml() throws Exception {
assertThat(filter.filter("&lt;div&gt;test&lt;/test&gt;", interpreter))
.isEqualTo("&lt;div&gt;test&lt;/test&gt;");
}
}

0 comments on commit 1c1ac14

Please sign in to comment.