> {
/**
* Get a cookie value by name from this request/response.
- *
- * Response objects have a simplified cookie model. Each cookie set in the response is added to the response
- * object's cookie key=value map. The cookie's path, domain, and expiry date are ignored.
- *
* @param name name of cookie to retrieve.
* @return value of cookie, or null if not set
*/
@@ -638,6 +634,7 @@ interface Base> {
/**
* Retrieve all of the request/response cookies as a map
* @return cookies
+ * @see #cookieStore()
*/
Map cookies();
}
diff --git a/src/main/java/org/jsoup/helper/CookieUtil.java b/src/main/java/org/jsoup/helper/CookieUtil.java
index f375003753..218e935efd 100644
--- a/src/main/java/org/jsoup/helper/CookieUtil.java
+++ b/src/main/java/org/jsoup/helper/CookieUtil.java
@@ -4,6 +4,8 @@
import org.jsoup.internal.StringUtil;
import java.io.IOException;
+import java.net.CookieManager;
+import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
@@ -83,8 +85,21 @@ static URI asUri(URL url) throws IOException {
}
}
- static void storeCookies(HttpConnection.Request req, URL url, Map> resHeaders) throws IOException {
- req.cookieManager().put(CookieUtil.asUri(url), resHeaders); // stores cookies for session
+ /** Store the Result cookies into the cookie manager, and place relevant cookies into the Response object. */
+ static void storeCookies(HttpConnection.Request req, HttpConnection.Response res, URL url, Map> resHeaders) throws IOException {
+ CookieManager manager = req.cookieManager();
+ URI uri = CookieUtil.asUri(url);
+ manager.put(uri, resHeaders); // stores cookies for session
+ // set up the simple cookie(name, value) map:
+ Map> cookieMap = manager.get(uri, resHeaders); // get cookies for url; may have been set on this or earlier requests. the headers here are ignored other than a null check
+ for (List values : cookieMap.values()) {
+ for (String headerVal : values) {
+ List cookies = HttpCookie.parse(headerVal);
+ for (HttpCookie cookie : cookies) {
+ res.cookie(cookie.getName(), cookie.getValue());
+ }
+ }
+ }
}
}
diff --git a/src/main/java/org/jsoup/helper/HttpConnection.java b/src/main/java/org/jsoup/helper/HttpConnection.java
index 425d77d0cf..712019108b 100644
--- a/src/main/java/org/jsoup/helper/HttpConnection.java
+++ b/src/main/java/org/jsoup/helper/HttpConnection.java
@@ -1133,14 +1133,9 @@ private Response(HttpURLConnection conn, HttpConnection.Request request, HttpCon
Map> resHeaders = createHeaderMap(conn);
processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
- CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store
+ CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store
if (previousResponse != null) { // was redirected
- // map previous response cookies into this response cookies() object
- for (Map.Entry prevCookie : previousResponse.cookies().entrySet()) {
- if (!hasCookie(prevCookie.getKey()))
- cookie(prevCookie.getKey(), prevCookie.getValue());
- }
previousResponse.safeClose();
// enforce too many redirects:
@@ -1176,19 +1171,6 @@ void processResponseHeaders(Map> resHeaders) {
continue; // http/1.1 line
List values = entry.getValue();
- if (name.equalsIgnoreCase("Set-Cookie")) {
- for (String value : values) {
- if (value == null)
- continue;
- TokenQueue cd = new TokenQueue(value);
- String cookieName = cd.chompTo("=").trim();
- String cookieVal = cd.consumeTo(";").trim();
- // ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required
- // name not blank, value not null
- if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first
- cookie(cookieName, cookieVal);
- }
- }
for (String value : values) {
addHeader(name, fixHeaderEncoding(value));
}
diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
index f1b2d7b239..470a785a50 100644
--- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
+++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
@@ -25,18 +25,19 @@ enum HtmlTreeBuilderState {
tb.insertCommentNode(t.asComment());
} else if (t.isDoctype()) {
// todo: parse error check on expected doctypes
- // todo: quirk state check on doctype ids
Token.Doctype d = t.asDoctype();
DocumentType doctype = new DocumentType(
tb.settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier());
doctype.setPubSysKey(d.getPubSysKey());
tb.getDocument().appendChild(doctype);
tb.onNodeInserted(doctype);
- if (d.isForceQuirks())
+ // todo: quirk state check on more doctype ids, if deemed useful (most are ancient legacy and presumably irrelevant)
+ if (d.isForceQuirks() || !doctype.name().equals("html") || doctype.publicId().equalsIgnoreCase("HTML"))
tb.getDocument().quirksMode(Document.QuirksMode.quirks);
tb.transition(BeforeHtml);
} else {
// todo: check not iframe srcdoc
+ tb.getDocument().quirksMode(Document.QuirksMode.quirks); // missing doctype
tb.transition(BeforeHtml);
return tb.process(t); // re-process token
}
diff --git a/src/main/java/org/jsoup/select/StructuralEvaluator.java b/src/main/java/org/jsoup/select/StructuralEvaluator.java
index 1e01ed03f1..c64eab3ac0 100644
--- a/src/main/java/org/jsoup/select/StructuralEvaluator.java
+++ b/src/main/java/org/jsoup/select/StructuralEvaluator.java
@@ -1,6 +1,7 @@
package org.jsoup.select;
import org.jsoup.internal.Functions;
+import org.jsoup.internal.SoftPool;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.NodeIterator;
@@ -51,8 +52,8 @@ public boolean matches(Element root, Element element) {
}
static class Has extends StructuralEvaluator {
- static final ThreadLocal> ThreadElementIter =
- ThreadLocal.withInitial(() -> new NodeIterator<>(new Element("html"), Element.class));
+ static final SoftPool> ElementIterPool =
+ new SoftPool<>(() -> new NodeIterator<>(new Element("html"), Element.class));
// the element here is just a placeholder so this can be final - gets set in restart()
private final boolean checkSiblings; // evaluating against siblings (or children)
@@ -69,16 +70,20 @@ public Has(Evaluator evaluator) {
return true;
}
}
- } else {
- // otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj
- NodeIterator it = ThreadElementIter.get();
- it.restart(element);
+ }
+ // otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj
+ NodeIterator it = ElementIterPool.borrow();
+ it.restart(element);
+ try {
while (it.hasNext()) {
Element el = it.next();
if (el == element) continue; // don't match self, only descendants
- if (evaluator.matches(element, el))
+ if (evaluator.matches(element, el)) {
return true;
+ }
}
+ } finally {
+ ElementIterPool.release(it);
}
return false;
}
diff --git a/src/test/java/org/jsoup/helper/HttpConnectionTest.java b/src/test/java/org/jsoup/helper/HttpConnectionTest.java
index 833fead486..7162e7f05b 100644
--- a/src/test/java/org/jsoup/helper/HttpConnectionTest.java
+++ b/src/test/java/org/jsoup/helper/HttpConnectionTest.java
@@ -155,27 +155,6 @@ public void caseInsensitiveHeaders(Locale locale) {
assertEquals(0, res.cookies().size());
}
- @Test public void ignoresEmptyCookieNameAndVals() {
- // prep http response header map
- Map> headers = new HashMap<>();
- List cookieStrings = new ArrayList<>();
- cookieStrings.add(null);
- cookieStrings.add("");
- cookieStrings.add("one");
- cookieStrings.add("two=");
- cookieStrings.add("three=;");
- cookieStrings.add("four=data; Domain=.example.com; Path=/");
-
- headers.put("Set-Cookie", cookieStrings);
- HttpConnection.Response res = new HttpConnection.Response();
- res.processResponseHeaders(headers);
- assertEquals(4, res.cookies().size());
- assertEquals("", res.cookie("one"));
- assertEquals("", res.cookie("two"));
- assertEquals("", res.cookie("three"));
- assertEquals("data", res.cookie("four"));
- }
-
@Test public void connectWithUrl() throws MalformedURLException {
Connection con = HttpConnection.connect(new URL("http://example.com"));
assertEquals("http://example.com", con.request().url().toExternalForm());
diff --git a/src/test/java/org/jsoup/helper/W3CDomTest.java b/src/test/java/org/jsoup/helper/W3CDomTest.java
index 7e1de61547..f9c39a4d54 100644
--- a/src/test/java/org/jsoup/helper/W3CDomTest.java
+++ b/src/test/java/org/jsoup/helper/W3CDomTest.java
@@ -387,4 +387,33 @@ public void canOutputHtmlWithoutNamespace() {
assertEquals("Foo", doc.getFirstChild().getTextContent());
}
+ @Test void testHtmlParseAttributesAreCaseInsensitive() throws IOException {
+ // https://github.com/jhy/jsoup/issues/981
+ String html = "\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "";
+ org.jsoup.nodes.Document jsoupDoc;
+ jsoupDoc = Jsoup.parse(html);
+ org.jsoup.helper.W3CDom jDom = new org.jsoup.helper.W3CDom();
+ Document doc = jDom.fromJsoup(jsoupDoc);
+ org.w3c.dom.Element body = (org.w3c.dom.Element) doc.getDocumentElement().getElementsByTagName("body").item(0);
+ NodeList imgs = body.getElementsByTagName("img");
+ assertEquals(2, imgs.getLength());
+ org.w3c.dom.Element first = (org.w3c.dom.Element) imgs.item(0);
+ assertEquals(first.getAttributes().getLength(), 2);
+ String img1 = first.getAttribute("src");
+ assertEquals("firstImage.jpg", img1);
+ String alt1 = first.getAttribute("alt");
+ assertEquals("Alt one", alt1);
+ org.w3c.dom.Element second = (org.w3c.dom.Element) imgs.item(1);
+ assertEquals(second.getAttributes().getLength(), 2);
+ String img2 = second.getAttribute("src");
+ assertEquals("secondImage.jpg", img2);
+ String alt2 = second.getAttribute("alt");
+ assertEquals("Alt two", alt2);
+ }
+
}
diff --git a/src/test/java/org/jsoup/integration/ConnectTest.java b/src/test/java/org/jsoup/integration/ConnectTest.java
index 3012828085..6042f81c64 100644
--- a/src/test/java/org/jsoup/integration/ConnectTest.java
+++ b/src/test/java/org/jsoup/integration/ConnectTest.java
@@ -19,6 +19,7 @@
import org.jsoup.parser.StreamParser;
import org.jsoup.parser.XmlTreeBuilder;
import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
@@ -60,6 +61,12 @@ public static void setUp() {
echoUrl = EchoServlet.Url;
}
+ @BeforeEach
+ public void emptyCookieJar() {
+ // empty the cookie jar, so cookie tests are independent.
+ Jsoup.connect("http://example.com").cookieStore().removeAll();
+ }
+
@Test
public void canConnectToLocalServer() throws IOException {
String url = HelloServlet.Url;
@@ -427,7 +434,7 @@ public void multiCookieSet() throws IOException {
// test cookies set by redirect:
Map cookies = res.cookies();
assertEquals("asdfg123", cookies.get("token"));
- assertEquals("jhy", cookies.get("uid"));
+ assertEquals("jhy", cookies.get("uid")); // two uids set, order dependent
// send those cookies into the echo URL by map:
Document doc = Jsoup.connect(echoUrl).cookies(cookies).get();
diff --git a/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java b/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java
index 0a937b772f..41243c36aa 100644
--- a/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java
+++ b/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java
@@ -33,7 +33,8 @@ protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOEx
if (req.getParameter(SetCookiesParam) != null) {
res.addCookie(new Cookie("token", "asdfg123"));
- res.addCookie(new Cookie("uid", "jhy"));
+ res.addCookie(new Cookie("uid", "foobar"));
+ res.addCookie(new Cookie("uid", "jhy")); // dupe, should use latter
}
res.setHeader("Location", location);
diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java
index be566d3654..2c2fd50d9b 100644
--- a/src/test/java/org/jsoup/nodes/ElementTest.java
+++ b/src/test/java/org/jsoup/nodes/ElementTest.java
@@ -26,6 +26,7 @@
import java.util.regex.Pattern;
import java.util.stream.Stream;
+import static org.jsoup.select.SelectorTest.assertSelectedOwnText;
import static org.junit.jupiter.api.Assertions.*;
/**
@@ -2636,7 +2637,7 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals(selected.first(), div);
}
- @Test void cssSelectorWithAstrix() {
+ @Test void cssSelectorWithAsterisk() {
// https://github.com/jhy/jsoup/issues/2169
Document doc = Jsoup.parse("One
Two
");
Element div = doc.expectFirst("div");
@@ -2648,6 +2649,16 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals(selected.first(), div);
}
+ @Test void cssSelectorWithPipe() {
+ // https://github.com/jhy/jsoup/issues/1998
+ Document doc = Jsoup.parse("One
");
+ Element span = doc.expectFirst("div span");
+ String selector = span.cssSelector();
+ assertEquals("html > body > div > span.\\|", selector);
+ Elements selected = doc.select(selector);
+ assertSelectedOwnText(selected, "One");
+ }
+
@Test void orphanSiblings() {
Element el = new Element("div");
assertEquals(0, el.siblingElements().size());
diff --git a/src/test/java/org/jsoup/parser/HtmlParserTest.java b/src/test/java/org/jsoup/parser/HtmlParserTest.java
index 7fa7a67a59..a67003a839 100644
--- a/src/test/java/org/jsoup/parser/HtmlParserTest.java
+++ b/src/test/java/org/jsoup/parser/HtmlParserTest.java
@@ -1888,4 +1888,24 @@ private static void assertMathNamespace(Element el) {
img.ownerDocument().outputSettings().charset("ascii");
assertEquals("", img.outerHtml());
}
+
+ @Test void tableInPInQuirksMode() {
+ // https://github.com/jhy/jsoup/issues/2197
+ String html = "
";
+ Document doc = Jsoup.parse(html);
+ assertEquals(Document.QuirksMode.quirks, doc.quirksMode());
+ assertEquals(
+ "
", // quirks, allows table in p
+ TextUtil.normalizeSpaces(doc.body().html())
+ );
+
+ // doctype set, no quirks
+ html ="
";
+ doc = Jsoup.parse(html);
+ assertEquals(Document.QuirksMode.noQuirks, doc.quirksMode());
+ assertEquals(
+ "
", // no quirks, p gets closed
+ TextUtil.normalizeSpaces(doc.body().html())
+ );
+ }
}
diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java
index d296e1a2b7..0ae4048e3f 100644
--- a/src/test/java/org/jsoup/select/SelectorTest.java
+++ b/src/test/java/org/jsoup/select/SelectorTest.java
@@ -28,7 +28,7 @@ public static void assertSelectedIds(Elements els, String... ids) {
}
}
- static void assertSelectedOwnText(Elements els, String... ownTexts) {
+ public static void assertSelectedOwnText(Elements els, String... ownTexts) {
assertNotNull(els);
assertEquals(ownTexts.length, els.size(), "Incorrect number of selected elements");
for (int i = 0; i < ownTexts.length; i++) {
@@ -1296,4 +1296,51 @@ public void emptyPseudo() {
Elements emptyAttr = doc.select("p:not([*])");
assertSelectedOwnText(emptyAttr, "Three");
}
+
+ @Test void divHasSpanPreceding() {
+ // https://github.com/jhy/jsoup/issues/2187
+ String html = "";
+ String q = "div:has(span + a)";
+
+ Document doc = Jsoup.parse(html);
+ Elements els = doc.select(q);
+ assertEquals(1, els.size());
+ assertEquals("div", els.first().normalName());
+ }
+
+ @Test void divHasDivPreceding() {
+ // https://github.com/jhy/jsoup/issues/2131
+ String html = "\n" +
+ "
hello
\n" +
+ "
there
\n" +
+ "\n" +
+ "
";
+
+ String q = "div:has(>div + div)";
+
+ Document doc = Jsoup.parse(html);
+ Elements els = doc.select(q);
+ assertEquals(1, els.size());
+ assertEquals("div", els.first().normalName());
+ assertEquals("1", els.first().id());
+ }
+
+ @Test void nestedMultiHas() {
+ // https://github.com/jhy/jsoup/issues/2131
+ String html =
+ "" +
+ "" +
+ "" +
+ "" +
+ "
hello
" +
+ "
world
" +
+ "
" +
+ "";
+ Document document = Jsoup.parse(html);
+
+ String q = "div:has(> div:has(> span) + div:has(> span))";
+ Elements els = document.select(q);
+ assertEquals(1, els.size());
+ assertEquals("o", els.get(0).id());
+ }
}