Skip to content

Commit

Permalink
Merge branch 'master' into Element-stream
Browse files Browse the repository at this point in the history
  • Loading branch information
Isira-Seneviratne authored Oct 3, 2024
2 parents 666c27a + b6bd4b4 commit 2fde33f
Show file tree
Hide file tree
Showing 14 changed files with 170 additions and 67 deletions.
11 changes: 10 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@
* `Element.cssSelector()` would fail if the element's class contained a `*`
character. [2169](https://github.com/jhy/jsoup/issues/2169)
* When tracking source ranges, a text node following an invalid self-closing element may be left
untracked.[2175](https://github.com/jhy/jsoup/issues/2175)
untracked. [2175](https://github.com/jhy/jsoup/issues/2175)
* When a document has no doctype, or a doctype not named `html`, it should be parsed in Quirks
Mode. [2197](https://github.com/jhy/jsoup/issues/2197)
* With a selector like `div:has(span + a)`, the `has()` component was not working correctly, as the inner combining
query caused the evaluator to match those against the outer's siblings, not
children. [2187](https://github.com/jhy/jsoup/issues/2187)
* A selector query that included multiple `:has()` components in a nested `:has()` might incorrectly
execute. [2131](https://github.com/jhy/jsoup/issues/2131)
* Updated the simple view of cookies available via `Connection.Response#cookies()` to reflect the contents of the
current cookie jar for the current URL. [1831](https://github.com/jhy/jsoup/issues/1831)

## 1.18.1 (2024-Jul-10)

Expand Down
14 changes: 7 additions & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jetty.version>9.4.55.v20240627</jetty.version>
<jetty.version>9.4.56.v20240826</jetty.version>
</properties>

<build>
Expand Down Expand Up @@ -118,7 +118,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.8.0</version>
<version>3.10.0</version>
<configuration>
<doclint>none</doclint>
<source>8</source>
Expand Down Expand Up @@ -203,15 +203,15 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.3.1</version>
<version>3.5.0</version>
<configuration>
<!-- smaller stack to find stack overflows. Was 256, but Zulu on MacOS ARM needs >= 640 -->
<argLine>-Xss640k</argLine>
</configuration>
</plugin>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>3.3.1</version>
<version>3.5.0</version>
<executions>
<execution>
<goals>
Expand All @@ -229,7 +229,7 @@
<!-- API version compat check - https://siom79.github.io/japicmp/ -->
<groupId>com.github.siom79.japicmp</groupId>
<artifactId>japicmp-maven-plugin</artifactId>
<version>0.22.0</version>
<version>0.23.0</version>
<configuration>
<!-- hard code previous version; can't detect when running stateless on build server -->
<oldVersion>
Expand Down Expand Up @@ -372,7 +372,7 @@
<plugins>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>3.3.1</version>
<version>3.5.0</version>
<executions>
<execution>
<goals>
Expand All @@ -393,7 +393,7 @@
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.10.3</version>
<version>5.11.0</version>
<scope>test</scope>
</dependency>

Expand Down
5 changes: 1 addition & 4 deletions src/main/java/org/jsoup/Connection.java
Original file line number Diff line number Diff line change
Expand Up @@ -604,10 +604,6 @@ interface Base<T extends Base<T>> {

/**
* Get a cookie value by name from this request/response.
* <p>
* Response objects have a simplified cookie model. Each cookie set in the response is added to the response
* object's cookie key=value map. The cookie's path, domain, and expiry date are ignored.
* </p>
* @param name name of cookie to retrieve.
* @return value of cookie, or null if not set
*/
Expand Down Expand Up @@ -638,6 +634,7 @@ interface Base<T extends Base<T>> {
/**
* Retrieve all of the request/response cookies as a map
* @return cookies
* @see #cookieStore()
*/
Map<String, String> cookies();
}
Expand Down
19 changes: 17 additions & 2 deletions src/main/java/org/jsoup/helper/CookieUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import org.jsoup.internal.StringUtil;

import java.io.IOException;
import java.net.CookieManager;
import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
Expand Down Expand Up @@ -83,8 +85,21 @@ static URI asUri(URL url) throws IOException {
}
}

static void storeCookies(HttpConnection.Request req, URL url, Map<String, List<String>> resHeaders) throws IOException {
req.cookieManager().put(CookieUtil.asUri(url), resHeaders); // stores cookies for session
/** Store the Result cookies into the cookie manager, and place relevant cookies into the Response object. */
static void storeCookies(HttpConnection.Request req, HttpConnection.Response res, URL url, Map<String, List<String>> resHeaders) throws IOException {
CookieManager manager = req.cookieManager();
URI uri = CookieUtil.asUri(url);
manager.put(uri, resHeaders); // stores cookies for session

// set up the simple cookie(name, value) map:
Map<String, List<String>> cookieMap = manager.get(uri, resHeaders); // get cookies for url; may have been set on this or earlier requests. the headers here are ignored other than a null check
for (List<String> values : cookieMap.values()) {
for (String headerVal : values) {
List<HttpCookie> cookies = HttpCookie.parse(headerVal);
for (HttpCookie cookie : cookies) {
res.cookie(cookie.getName(), cookie.getValue());
}
}
}
}
}
20 changes: 1 addition & 19 deletions src/main/java/org/jsoup/helper/HttpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -1133,14 +1133,9 @@ private Response(HttpURLConnection conn, HttpConnection.Request request, HttpCon

Map<String, List<String>> resHeaders = createHeaderMap(conn);
processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store
CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store

if (previousResponse != null) { // was redirected
// map previous response cookies into this response cookies() object
for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
if (!hasCookie(prevCookie.getKey()))
cookie(prevCookie.getKey(), prevCookie.getValue());
}
previousResponse.safeClose();

// enforce too many redirects:
Expand Down Expand Up @@ -1176,19 +1171,6 @@ void processResponseHeaders(Map<String, List<String>> resHeaders) {
continue; // http/1.1 line

List<String> values = entry.getValue();
if (name.equalsIgnoreCase("Set-Cookie")) {
for (String value : values) {
if (value == null)
continue;
TokenQueue cd = new TokenQueue(value);
String cookieName = cd.chompTo("=").trim();
String cookieVal = cd.consumeTo(";").trim();
// ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required
// name not blank, value not null
if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first
cookie(cookieName, cookieVal);
}
}
for (String value : values) {
addHeader(name, fixHeaderEncoding(value));
}
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,19 @@ enum HtmlTreeBuilderState {
tb.insertCommentNode(t.asComment());
} else if (t.isDoctype()) {
// todo: parse error check on expected doctypes
// todo: quirk state check on doctype ids
Token.Doctype d = t.asDoctype();
DocumentType doctype = new DocumentType(
tb.settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier());
doctype.setPubSysKey(d.getPubSysKey());
tb.getDocument().appendChild(doctype);
tb.onNodeInserted(doctype);
if (d.isForceQuirks())
// todo: quirk state check on more doctype ids, if deemed useful (most are ancient legacy and presumably irrelevant)
if (d.isForceQuirks() || !doctype.name().equals("html") || doctype.publicId().equalsIgnoreCase("HTML"))
tb.getDocument().quirksMode(Document.QuirksMode.quirks);
tb.transition(BeforeHtml);
} else {
// todo: check not iframe srcdoc
tb.getDocument().quirksMode(Document.QuirksMode.quirks); // missing doctype
tb.transition(BeforeHtml);
return tb.process(t); // re-process token
}
Expand Down
19 changes: 12 additions & 7 deletions src/main/java/org/jsoup/select/StructuralEvaluator.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jsoup.select;

import org.jsoup.internal.Functions;
import org.jsoup.internal.SoftPool;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.NodeIterator;
Expand Down Expand Up @@ -51,8 +52,8 @@ public boolean matches(Element root, Element element) {
}

static class Has extends StructuralEvaluator {
static final ThreadLocal<NodeIterator<Element>> ThreadElementIter =
ThreadLocal.withInitial(() -> new NodeIterator<>(new Element("html"), Element.class));
static final SoftPool<NodeIterator<Element>> ElementIterPool =
new SoftPool<>(() -> new NodeIterator<>(new Element("html"), Element.class));
// the element here is just a placeholder so this can be final - gets set in restart()

private final boolean checkSiblings; // evaluating against siblings (or children)
Expand All @@ -69,16 +70,20 @@ public Has(Evaluator evaluator) {
return true;
}
}
} else {
// otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj
NodeIterator<Element> it = ThreadElementIter.get();
it.restart(element);
}
// otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj
NodeIterator<Element> it = ElementIterPool.borrow();
it.restart(element);
try {
while (it.hasNext()) {
Element el = it.next();
if (el == element) continue; // don't match self, only descendants
if (evaluator.matches(element, el))
if (evaluator.matches(element, el)) {
return true;
}
}
} finally {
ElementIterPool.release(it);
}
return false;
}
Expand Down
21 changes: 0 additions & 21 deletions src/test/java/org/jsoup/helper/HttpConnectionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,27 +155,6 @@ public void caseInsensitiveHeaders(Locale locale) {
assertEquals(0, res.cookies().size());
}

@Test public void ignoresEmptyCookieNameAndVals() {
// prep http response header map
Map<String, List<String>> headers = new HashMap<>();
List<String> cookieStrings = new ArrayList<>();
cookieStrings.add(null);
cookieStrings.add("");
cookieStrings.add("one");
cookieStrings.add("two=");
cookieStrings.add("three=;");
cookieStrings.add("four=data; Domain=.example.com; Path=/");

headers.put("Set-Cookie", cookieStrings);
HttpConnection.Response res = new HttpConnection.Response();
res.processResponseHeaders(headers);
assertEquals(4, res.cookies().size());
assertEquals("", res.cookie("one"));
assertEquals("", res.cookie("two"));
assertEquals("", res.cookie("three"));
assertEquals("data", res.cookie("four"));
}

@Test public void connectWithUrl() throws MalformedURLException {
Connection con = HttpConnection.connect(new URL("http://example.com"));
assertEquals("http://example.com", con.request().url().toExternalForm());
Expand Down
29 changes: 29 additions & 0 deletions src/test/java/org/jsoup/helper/W3CDomTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -387,4 +387,33 @@ public void canOutputHtmlWithoutNamespace() {
assertEquals("Foo", doc.getFirstChild().getTextContent());
}

@Test void testHtmlParseAttributesAreCaseInsensitive() throws IOException {
// https://github.com/jhy/jsoup/issues/981
String html = "<html lang=en>\n" +
"<body>\n" +
"<img src=\"firstImage.jpg\" alt=\"Alt one\" />\n" +
"<IMG SRC=\"secondImage.jpg\" AlT=\"Alt two\" />\n" +
"</body>\n" +
"</html>";
org.jsoup.nodes.Document jsoupDoc;
jsoupDoc = Jsoup.parse(html);
org.jsoup.helper.W3CDom jDom = new org.jsoup.helper.W3CDom();
Document doc = jDom.fromJsoup(jsoupDoc);
org.w3c.dom.Element body = (org.w3c.dom.Element) doc.getDocumentElement().getElementsByTagName("body").item(0);
NodeList imgs = body.getElementsByTagName("img");
assertEquals(2, imgs.getLength());
org.w3c.dom.Element first = (org.w3c.dom.Element) imgs.item(0);
assertEquals(first.getAttributes().getLength(), 2);
String img1 = first.getAttribute("src");
assertEquals("firstImage.jpg", img1);
String alt1 = first.getAttribute("alt");
assertEquals("Alt one", alt1);
org.w3c.dom.Element second = (org.w3c.dom.Element) imgs.item(1);
assertEquals(second.getAttributes().getLength(), 2);
String img2 = second.getAttribute("src");
assertEquals("secondImage.jpg", img2);
String alt2 = second.getAttribute("alt");
assertEquals("Alt two", alt2);
}

}
9 changes: 8 additions & 1 deletion src/test/java/org/jsoup/integration/ConnectTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jsoup.parser.StreamParser;
import org.jsoup.parser.XmlTreeBuilder;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
Expand Down Expand Up @@ -60,6 +61,12 @@ public static void setUp() {
echoUrl = EchoServlet.Url;
}

@BeforeEach
public void emptyCookieJar() {
// empty the cookie jar, so cookie tests are independent.
Jsoup.connect("http://example.com").cookieStore().removeAll();
}

@Test
public void canConnectToLocalServer() throws IOException {
String url = HelloServlet.Url;
Expand Down Expand Up @@ -427,7 +434,7 @@ public void multiCookieSet() throws IOException {
// test cookies set by redirect:
Map<String, String> cookies = res.cookies();
assertEquals("asdfg123", cookies.get("token"));
assertEquals("jhy", cookies.get("uid"));
assertEquals("jhy", cookies.get("uid")); // two uids set, order dependent

// send those cookies into the echo URL by map:
Document doc = Jsoup.connect(echoUrl).cookies(cookies).get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOEx

if (req.getParameter(SetCookiesParam) != null) {
res.addCookie(new Cookie("token", "asdfg123"));
res.addCookie(new Cookie("uid", "jhy"));
res.addCookie(new Cookie("uid", "foobar"));
res.addCookie(new Cookie("uid", "jhy")); // dupe, should use latter
}

res.setHeader("Location", location);
Expand Down
13 changes: 12 additions & 1 deletion src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.regex.Pattern;
import java.util.stream.Stream;

import static org.jsoup.select.SelectorTest.assertSelectedOwnText;
import static org.junit.jupiter.api.Assertions.*;

/**
Expand Down Expand Up @@ -2636,7 +2637,7 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals(selected.first(), div);
}

@Test void cssSelectorWithAstrix() {
@Test void cssSelectorWithAsterisk() {
// https://github.com/jhy/jsoup/issues/2169
Document doc = Jsoup.parse("<div class='vds-items_flex-end [&amp;_>_*:first-child]:vds-pt_0'>One</div><div class='vds-items_flex-end'>Two</div>");
Element div = doc.expectFirst("div");
Expand All @@ -2648,6 +2649,16 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals(selected.first(), div);
}

@Test void cssSelectorWithPipe() {
// https://github.com/jhy/jsoup/issues/1998
Document doc = Jsoup.parse("<div><span class='|'>One</div>");
Element span = doc.expectFirst("div span");
String selector = span.cssSelector();
assertEquals("html > body > div > span.\\|", selector);
Elements selected = doc.select(selector);
assertSelectedOwnText(selected, "One");
}

@Test void orphanSiblings() {
Element el = new Element("div");
assertEquals(0, el.siblingElements().size());
Expand Down
20 changes: 20 additions & 0 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1888,4 +1888,24 @@ private static void assertMathNamespace(Element el) {
img.ownerDocument().outputSettings().charset("ascii");
assertEquals("<img multi=\"&#x1f4af;\" single=\"&#x1f4af;\" hexsingle=\"&#x1f4af;\">", img.outerHtml());
}

@Test void tableInPInQuirksMode() {
// https://github.com/jhy/jsoup/issues/2197
String html = "<p><span><table><tbody><tr><td><span>Hello table data</span></td></tr></tbody></table></span></p>";
Document doc = Jsoup.parse(html);
assertEquals(Document.QuirksMode.quirks, doc.quirksMode());
assertEquals(
"<p><span><table><tbody><tr><td><span>Hello table data</span></td></tr></tbody></table></span></p>", // quirks, allows table in p
TextUtil.normalizeSpaces(doc.body().html())
);

// doctype set, no quirks
html ="<!DOCTYPE html><p><span><table><tbody><tr><td><span>Hello table data</span></td></tr></tbody></table></span></p>";
doc = Jsoup.parse(html);
assertEquals(Document.QuirksMode.noQuirks, doc.quirksMode());
assertEquals(
"<p><span></span></p><table><tbody><tr><td><span>Hello table data</span></td></tr></tbody></table><p></p>", // no quirks, p gets closed
TextUtil.normalizeSpaces(doc.body().html())
);
}
}
Loading

0 comments on commit 2fde33f

Please sign in to comment.