From a16a173b4291a2335ab53afaa21aaff1f33d5fdd Mon Sep 17 00:00:00 2001 From: Matt Coley Date: Thu, 19 Aug 2021 14:20:57 -0400 Subject: [PATCH 1/2] Fix striptags to clean HTML instead of parsing --- .../jinjava/lib/filter/StripTagsFilter.java | 10 ++++++++-- .../jinjava/lib/filter/StripTagsFilterTest.java | 14 +++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java b/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java index cfc467738..083c0e9b8 100644 --- a/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java +++ b/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java @@ -6,6 +6,11 @@ import com.hubspot.jinjava.interpret.JinjavaInterpreter; import java.util.regex.Pattern; import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Document.OutputSettings; +import org.jsoup.nodes.Entities.EscapeMode; +import org.jsoup.parser.Parser; +import org.jsoup.safety.Whitelist; /** * striptags(value) Strip SGML/XML tags and replace adjacent whitespace by one space. @@ -34,8 +39,9 @@ public Object filter(Object object, JinjavaInterpreter interpreter, String... ar } String val = interpreter.renderFlat((String) object); - String strippedVal = Jsoup.parseBodyFragment(val).text(); - String normalizedVal = WHITESPACE.matcher(strippedVal).replaceAll(" "); + String cleanedVal = Jsoup.clean(val, Whitelist.none()); + + String normalizedVal = WHITESPACE.matcher(cleanedVal).replaceAll(" "); return normalizedVal; } diff --git a/src/test/java/com/hubspot/jinjava/lib/filter/StripTagsFilterTest.java b/src/test/java/com/hubspot/jinjava/lib/filter/StripTagsFilterTest.java index 64b1726fb..2ea987f78 100644 --- a/src/test/java/com/hubspot/jinjava/lib/filter/StripTagsFilterTest.java +++ b/src/test/java/com/hubspot/jinjava/lib/filter/StripTagsFilterTest.java @@ -37,7 +37,7 @@ public void itPassesThruNonStringVals() throws Exception { @Test public void itWorksWithNonHtmlStrings() throws Exception { assertThat(filter.filter("foo", interpreter)).isEqualTo("foo"); - assertThat(filter.filter("foo < bar", interpreter)).isEqualTo("foo < bar"); + assertThat(filter.filter("foo < bar", interpreter)).isEqualTo("foo < bar"); } @Test @@ -51,4 +51,16 @@ public void itStripsTagsFromHtml() throws Exception { assertThat(filter.filter("foo bar other", interpreter)) .isEqualTo("foo bar other"); } + + @Test + public void itStripsTagsFromNestedHtml() throws Exception { + assertThat(filter.filter("
test
", interpreter)) + .isEqualTo("test"); + } + + @Test + public void itStripsTagsFromEscapedHtml() throws Exception { + assertThat(filter.filter("<div>test</test>", interpreter)) + .isEqualTo("<div>test</test>"); + } } From 5874e7f5430ee367a7d89237f903ec11d7c82c5d Mon Sep 17 00:00:00 2001 From: Matt Coley Date: Thu, 19 Aug 2021 14:22:24 -0400 Subject: [PATCH 2/2] remove imports --- .../java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java b/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java index 083c0e9b8..72fbf7efc 100644 --- a/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java +++ b/src/main/java/com/hubspot/jinjava/lib/filter/StripTagsFilter.java @@ -6,10 +6,6 @@ import com.hubspot.jinjava.interpret.JinjavaInterpreter; import java.util.regex.Pattern; import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Document.OutputSettings; -import org.jsoup.nodes.Entities.EscapeMode; -import org.jsoup.parser.Parser; import org.jsoup.safety.Whitelist; /**