diff --git a/pom.xml b/pom.xml
index fa5cebd..6f119c4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
ai.preferred
venom
- 4.2.7-SNAPSHOT
+ 4.2.7
jar
${project.groupId}:${project.artifactId}
diff --git a/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java b/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java
index 7d73456..baaf3ed 100644
--- a/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java
+++ b/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java
@@ -21,12 +21,12 @@
import ai.preferred.venom.response.BaseResponse;
import ai.preferred.venom.response.Response;
import ai.preferred.venom.utils.ResponseDecompressor;
-import ai.preferred.venom.utils.UrlUtil;
import ai.preferred.venom.validator.Validator;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.apache.commons.io.IOUtils;
import org.apache.http.*;
+import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.ContentDecoder;
import org.apache.http.nio.IOControl;
@@ -43,8 +43,9 @@
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.net.URISyntaxException;
+import java.net.URI;
import java.nio.charset.UnsupportedCharsetException;
+import java.util.List;
import java.util.Set;
/**
@@ -146,29 +147,29 @@ private byte[] getContent(final HttpEntity entity) throws IOException {
* @return An instance of base response
* @throws IOException Reading http response
*/
- private BaseResponse createVenomResponse(final boolean compressed) throws IOException {
+ private BaseResponse createVenomResponse(final boolean compressed, final HttpContext context) throws IOException {
if (compressed) {
RESPONSE_DECOMPRESSOR.decompress(httpResponse);
}
+ final HttpClientContext clientContext = HttpClientContext.adapt(context);
+ final List redirectedLocations = clientContext.getRedirectLocations();
+ final String url;
+ if (redirectedLocations == null) {
+ url = request.getUrl();
+ } else {
+ url = redirectedLocations.get(redirectedLocations.size() - 1).toString();
+ }
+
final HttpEntity entity = httpResponse.getEntity();
final byte[] content = getContent(entity);
request.getDiagnostics().setSize(content.length);
final ContentType contentType = getContentType(entity);
final Header[] headers = httpResponse.getAllHeaders();
- String tryBaseUrl;
- try {
- tryBaseUrl = UrlUtil.getBaseUrl(request);
- } catch (URISyntaxException e) {
- LOGGER.warn("Could not parse base URL: " + request.getUrl());
- tryBaseUrl = request.getUrl();
- }
- final String baseUrl = tryBaseUrl;
-
return new BaseResponse(
httpResponse.getStatusLine().getStatusCode(),
- baseUrl,
+ url,
content,
contentType,
headers,
@@ -253,7 +254,7 @@ protected final BaseResponse buildResult(final HttpContext context) throws Excep
throw new StopCodeException(statusCode, "Stop code received.");
}
- final BaseResponse response = createVenomResponse(compressed);
+ final BaseResponse response = createVenomResponse(compressed, context);
releaseResources();
final Validator.Status status;
diff --git a/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java b/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java
index 9c9bc55..77b437c 100644
--- a/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java
+++ b/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java
@@ -24,7 +24,6 @@
import ai.preferred.venom.storage.FileManager;
import ai.preferred.venom.storage.Record;
import ai.preferred.venom.storage.StorageException;
-import ai.preferred.venom.utils.UrlUtil;
import ai.preferred.venom.validator.EmptyContentValidator;
import ai.preferred.venom.validator.PipelineValidator;
import ai.preferred.venom.validator.StatusOkValidator;
@@ -35,7 +34,6 @@
import org.slf4j.LoggerFactory;
import javax.validation.constraints.NotNull;
-import java.net.URISyntaxException;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.Future;
@@ -145,16 +143,7 @@ public void cancelled() {
LOGGER.debug("Record found with id: {}", record.getId());
- String tryBaseUrl;
- try {
- tryBaseUrl = UrlUtil.getBaseUrl(request);
- } catch (URISyntaxException e) {
- LOGGER.warn("Could not parse base URL: " + request.getUrl());
- tryBaseUrl = request.getUrl();
- }
- final String baseUrl = tryBaseUrl;
-
- final StorageResponse response = new StorageResponse(record, baseUrl);
+ final StorageResponse response = new StorageResponse(record, request.getUrl());
final Validator.Status status = validator.isValid(Unwrappable.unwrapRequest(request), response);
if (status != Validator.Status.VALID) {
future.failed(new ValidationException(status, response, "Invalid response."));
diff --git a/src/main/java/ai/preferred/venom/response/BaseResponse.java b/src/main/java/ai/preferred/venom/response/BaseResponse.java
index a04e2f1..95cae8c 100644
--- a/src/main/java/ai/preferred/venom/response/BaseResponse.java
+++ b/src/main/java/ai/preferred/venom/response/BaseResponse.java
@@ -20,6 +20,8 @@
import org.apache.http.HttpHost;
import org.apache.http.entity.ContentType;
+import javax.validation.constraints.NotNull;
+
/**
* @author Maksim Tkachenko
* @author Truong Quoc Tuan
@@ -50,7 +52,7 @@ public class BaseResponse implements Response {
/**
* The base url of this response.
*/
- private final String baseUrl;
+ private final String url;
/**
* The proxy used to obtain response.
@@ -61,16 +63,16 @@ public class BaseResponse implements Response {
* Constructs a base response.
*
* @param statusCode Status code of the response
- * @param baseUrl Base url of the response
+ * @param url Base url of the response
* @param content Content from the response
* @param contentType Content type of the response
* @param headers Headers from the response
* @param proxy Proxy used to obtain the response
*/
- public BaseResponse(final int statusCode, final String baseUrl, final byte[] content, final ContentType contentType,
+ public BaseResponse(final int statusCode, final String url, final byte[] content, final ContentType contentType,
final Header[] headers, final HttpHost proxy) {
this.statusCode = statusCode;
- this.baseUrl = baseUrl;
+ this.url = url;
this.content = content;
this.contentType = contentType;
this.headers = headers;
@@ -88,18 +90,23 @@ public final byte[] getContent() {
}
@Override
- public final ContentType getContentType() {
+ public final @NotNull ContentType getContentType() {
return contentType;
}
@Override
- public final Header[] getHeaders() {
+ public final @NotNull Header[] getHeaders() {
return headers;
}
@Override
- public final String getBaseUrl() {
- return baseUrl;
+ public final @NotNull String getUrl() {
+ return url;
+ }
+
+ @Override
+ public final @NotNull String getBaseUrl() {
+ return getUrl();
}
@Override
diff --git a/src/main/java/ai/preferred/venom/response/Response.java b/src/main/java/ai/preferred/venom/response/Response.java
index 09e0020..36f64f4 100644
--- a/src/main/java/ai/preferred/venom/response/Response.java
+++ b/src/main/java/ai/preferred/venom/response/Response.java
@@ -64,11 +64,21 @@ public interface Response {
@NotNull
Header[] getHeaders();
+ /**
+ * Returns the url used to fetch the response, if the request
+ * is redirected, this will be the final requested url.
+ *
+ * @return stripped down version of requested url
+ */
+ @NotNull
+ String getUrl();
+
/**
* Returns the base form of the url used in this request.
*
* @return stripped down version of requested url
*/
+ @Deprecated
@NotNull
String getBaseUrl();
diff --git a/src/main/java/ai/preferred/venom/response/StorageResponse.java b/src/main/java/ai/preferred/venom/response/StorageResponse.java
index eb8a16d..6cf92b2 100644
--- a/src/main/java/ai/preferred/venom/response/StorageResponse.java
+++ b/src/main/java/ai/preferred/venom/response/StorageResponse.java
@@ -21,6 +21,8 @@
import org.apache.http.HttpHost;
import org.apache.http.entity.ContentType;
+import javax.validation.constraints.NotNull;
+
/**
* @author Ween Jiann Lee
@@ -33,19 +35,19 @@ public class StorageResponse implements Response, Retrievable {
private final Record> record;
/**
- * The base url of this response.
+ * The url of this response.
*/
- private final String baseUrl;
+ private final String url;
/**
* Constructs a base response.
*
- * @param record record holding this response
- * @param baseUrl base URL of the response
+ * @param record record holding this response
+ * @param url base URL of the response
*/
- public StorageResponse(final Record> record, final String baseUrl) {
+ public StorageResponse(final Record> record, final String url) {
this.record = record;
- this.baseUrl = baseUrl;
+ this.url = url;
}
@Override
@@ -59,18 +61,23 @@ public final byte[] getContent() {
}
@Override
- public final ContentType getContentType() {
+ public final @NotNull ContentType getContentType() {
return record.getContentType();
}
@Override
- public final Header[] getHeaders() {
+ public final @NotNull Header[] getHeaders() {
return record.getResponseHeaders();
}
@Override
- public final String getBaseUrl() {
- return baseUrl;
+ public final @NotNull String getUrl() {
+ return url;
+ }
+
+ @Override
+ public final @NotNull String getBaseUrl() {
+ return getUrl();
}
@Override
diff --git a/src/main/java/ai/preferred/venom/response/VResponse.java b/src/main/java/ai/preferred/venom/response/VResponse.java
index 8b013d3..38d1ca2 100644
--- a/src/main/java/ai/preferred/venom/response/VResponse.java
+++ b/src/main/java/ai/preferred/venom/response/VResponse.java
@@ -22,6 +22,7 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
+import javax.validation.constraints.NotNull;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
@@ -62,18 +63,23 @@ public final byte[] getContent() {
}
@Override
- public final ContentType getContentType() {
+ public final @NotNull ContentType getContentType() {
return getInner().getContentType();
}
@Override
- public final Header[] getHeaders() {
+ public final @NotNull Header[] getHeaders() {
return getInner().getHeaders();
}
@Override
- public final String getBaseUrl() {
- return getInner().getBaseUrl();
+ public final @NotNull String getUrl() {
+ return getInner().getUrl();
+ }
+
+ @Override
+ public final @NotNull String getBaseUrl() {
+ return getInner().getUrl();
}
@Override
@@ -110,7 +116,7 @@ public final String getHtml(final Charset charset) {
* @return jsoup document of response
*/
public final Document getJsoup() {
- return Jsoup.parse(getHtml(), getBaseUrl());
+ return Jsoup.parse(getHtml(), getUrl());
}
/**
@@ -120,7 +126,7 @@ public final Document getJsoup() {
* @return jsoup document of response
*/
public final Document getJsoup(final Charset charset) {
- return Jsoup.parse(getHtml(charset), getBaseUrl());
+ return Jsoup.parse(getHtml(charset), getUrl());
}
@Override
diff --git a/src/main/java/ai/preferred/venom/utils/UrlUtil.java b/src/main/java/ai/preferred/venom/utils/UrlUtil.java
index 7b8737f..2f61b15 100644
--- a/src/main/java/ai/preferred/venom/utils/UrlUtil.java
+++ b/src/main/java/ai/preferred/venom/utils/UrlUtil.java
@@ -42,6 +42,7 @@ private UrlUtil() {
* @return base URL string
* @throws URISyntaxException if not a proper URL
*/
+ @Deprecated
public static String getBaseUrl(final Request request) throws URISyntaxException {
final URI uri = new URI(request.getUrl());
final URI baseUri = new URI(uri.getScheme(), null, uri.getHost(), uri.getPort(), uri.getPath(), null, null);
diff --git a/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java b/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java
index eac28a3..3c45eb8 100644
--- a/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java
+++ b/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java
@@ -24,6 +24,7 @@
import ai.preferred.venom.storage.FileManager;
import ai.preferred.venom.storage.Record;
import com.github.tomakehurst.wiremock.WireMockServer;
+import com.google.common.collect.ImmutableList;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
@@ -36,6 +37,7 @@
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
+import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
@@ -91,7 +93,7 @@ public void testGet() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -114,7 +116,7 @@ public void testPost() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -137,7 +139,7 @@ public void testPost() throws ExecutionException, InterruptedException {
// final Future responseFuture = fetcher.fetch(request);
// final Response response = responseFuture.get();
// Assertions.assertEquals(200, response.getStatusCode());
-// Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+// Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
// Assertions.assertEquals("text/html", response.getContentType().getMimeType());
// Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
//
@@ -160,7 +162,7 @@ public void testPut() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -183,7 +185,7 @@ public void testDelete() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -206,7 +208,7 @@ public void testOptions() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -235,7 +237,7 @@ public void testParseImageContentType() throws ExecutionException, InterruptedEx
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("image/png", response.getContentType().getMimeType());
}
@@ -254,7 +256,7 @@ public void testParseHTMLContentType() throws ExecutionException, InterruptedExc
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
final VResponse vResponse = new VResponse(response);
@@ -283,7 +285,7 @@ public void testParseJSONCharset() throws ExecutionException, InterruptedExcepti
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/json", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -321,7 +323,7 @@ public void testUserAgent() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
final VResponse vResponse = new VResponse(response);
Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web."));
@@ -346,7 +348,7 @@ public void testReplaceUserAgent() throws ExecutionException, InterruptedExcepti
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
final VResponse vResponse = new VResponse(response);
Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web."));
@@ -372,7 +374,7 @@ public void testHeaders() throws ExecutionException, InterruptedException {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
final VResponse vResponse = new VResponse(response);
Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web."));
@@ -400,7 +402,7 @@ public void testPresetHeaders() throws Exception {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
final VResponse vResponse = new VResponse(response);
Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web."));
@@ -430,7 +432,7 @@ public void testPrependHeaders() throws Exception {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
final VResponse vResponse = new VResponse(response);
Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web."));
@@ -456,7 +458,7 @@ public void testFileMangerIntegration() throws Exception {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -504,7 +506,7 @@ public void cancelled(@NotNull Request request) {
final Future responseFuture = fetcher.fetch(request);
final Response response = responseFuture.get();
Assertions.assertEquals(200, response.getStatusCode());
- Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl());
+ Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl());
Assertions.assertEquals("text/html", response.getContentType().getMimeType());
Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
@@ -563,5 +565,37 @@ public void testClosed() throws Exception {
Assertions.assertTrue(thrown.get(), "CancellationException not thrown.");
}
+ @Test
+ public void testRedirection() throws Exception {
+ final int port = wireMockServer.port();
+ configureFor("localhost", port);
+ final List paths = ImmutableList.of(
+ "/test-redirect-1",
+ "/test-redirect-2",
+ "/test-fetch"
+ );
+
+ for (int i = 0; i < paths.size() - 1; i++) {
+ stubFor(get(urlEqualTo(paths.get(i)))
+ .willReturn(temporaryRedirect(paths.get(i + 1))));
+ }
+
+ stubFor(get(urlEqualTo(paths.get(paths.size() - 1)))
+ .willReturn(aResponse()
+ .withStatus(200)
+ .withHeader("Content-Type", "text/html; charset=utf-8")
+ .withBody(content)));
+
+ final Request request = new VRequest("http://127.0.0.1:" + port + paths.get(0));
+ final Future responseFuture = fetcher.fetch(request);
+ final Response response = responseFuture.get();
+ Assertions.assertEquals(200, response.getStatusCode());
+ Assertions.assertEquals("http://127.0.0.1:" + port + paths.get(paths.size() - 1), response.getUrl());
+ Assertions.assertEquals("text/html", response.getContentType().getMimeType());
+ Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset());
+
+ final VResponse vResponse = new VResponse(response);
+ Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web."));
+ }
}
diff --git a/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java b/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java
index 42d1c0f..c6a8faf 100644
--- a/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java
+++ b/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java
@@ -63,13 +63,12 @@ public void start() {
@Override
public @NotNull Future fetch(@NotNull Request request, @NotNull Callback callback) {
final int statusCode = 200;
- final String baseUrl = request.getUrl();
final byte[] content = "IPSUM".getBytes();
final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8);
final Header[] headers = {};
final HttpHost proxy = request.getProxy();
- final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, proxy);
+ final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, proxy);
final Status status = statuses.poll();
counter.incrementAndGet();
diff --git a/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java b/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java
index 1c521a4..2c620f2 100644
--- a/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java
+++ b/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java
@@ -44,7 +44,7 @@ public class StorageFetcherTest {
@Test
public void testTrue() throws Exception {
final String path = "/test-fetch";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Request request = new VRequest(url);
final int statusCode = 200;
@@ -78,7 +78,7 @@ public void testHeadersTrue() throws Exception {
final String path = "/test-headers";
final String headerKey = "Cookie";
final String headerValue = "text=json;";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Request request = new VRequest(url);
final int statusCode = 200;
@@ -115,7 +115,7 @@ public void testFetcherHeadersTrue() throws Exception {
final String path = "/fetcher-headers";
final String headerKey = "Cookie";
final String headerValue = "text=json;";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Request submittedRequest = new VRequest(url);
final int statusCode = 200;
@@ -155,7 +155,7 @@ public void testNotFound() throws Exception {
final String path = "/not-found";
final String headerKey = "Cookie";
final String headerValue = "text=json;";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Request request = new VRequest(url);
final int statusCode = 200;
@@ -196,7 +196,7 @@ public void testNotFound() throws Exception {
@Test
public void testFailure() throws Exception {
final String path = "/test-failure";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Request request = new VRequest(url);
final int statusCode = 200;
@@ -230,7 +230,7 @@ public void testFailure() throws Exception {
@Test
public void testValidation() throws Exception {
final String path = "/test-validation";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Request request = new VRequest(url);
final int statusCode = 500;
diff --git a/src/test/java/ai/preferred/venom/response/BaseResponseTest.java b/src/test/java/ai/preferred/venom/response/BaseResponseTest.java
index fb8c486..9735a8d 100644
--- a/src/test/java/ai/preferred/venom/response/BaseResponseTest.java
+++ b/src/test/java/ai/preferred/venom/response/BaseResponseTest.java
@@ -14,7 +14,7 @@ public void testBaseResponse() {
final String path = "/test-response";
final String headerKey = "Cookie";
final String headerValue = "text=json;";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final int statusCode = 200;
final String contentStr = "This is a test";
@@ -29,7 +29,7 @@ public void testBaseResponse() {
Assertions.assertEquals(content, baseResponse.getContent());
Assertions.assertEquals(contentType, baseResponse.getContentType());
Assertions.assertEquals(headers, baseResponse.getHeaders());
- Assertions.assertEquals(url, baseResponse.getBaseUrl());
+ Assertions.assertEquals(url, baseResponse.getUrl());
Assertions.assertEquals(proxy, baseResponse.getProxy());
}
diff --git a/src/test/java/ai/preferred/venom/response/StorageResponseTest.java b/src/test/java/ai/preferred/venom/response/StorageResponseTest.java
index 86b7ea1..83d22c6 100644
--- a/src/test/java/ai/preferred/venom/response/StorageResponseTest.java
+++ b/src/test/java/ai/preferred/venom/response/StorageResponseTest.java
@@ -19,7 +19,7 @@ public void testStorageResponse() {
final String path = "/test-response";
final String headerKey = "Cookie";
final String headerValue = "text=json;";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final Map headerMap = Collections.singletonMap(headerKey, headerValue);
final Request request = new VRequest(url, headerMap);
@@ -46,7 +46,7 @@ public void testStorageResponse() {
Assertions.assertEquals(content, storageResponse.getContent());
Assertions.assertEquals(contentType, storageResponse.getContentType());
Assertions.assertEquals(headers, storageResponse.getHeaders());
- Assertions.assertEquals(url, storageResponse.getBaseUrl());
+ Assertions.assertEquals(url, storageResponse.getUrl());
Assertions.assertNull(storageResponse.getProxy());
Assertions.assertEquals(storageRecord, storageResponse.getRecord());
}
diff --git a/src/test/java/ai/preferred/venom/response/VResponseTest.java b/src/test/java/ai/preferred/venom/response/VResponseTest.java
index 8fdc9b6..0d821a6 100644
--- a/src/test/java/ai/preferred/venom/response/VResponseTest.java
+++ b/src/test/java/ai/preferred/venom/response/VResponseTest.java
@@ -14,7 +14,7 @@ public void testVResponse() {
final String path = "/test-response";
final String headerKey = "Cookie";
final String headerValue = "text=json;";
- final String url = "http://127.0.0.1/" + path;
+ final String url = "http://127.0.0.1" + path;
final int statusCode = 200;
final String contentStr = "This is a test";
@@ -30,7 +30,7 @@ public void testVResponse() {
Assertions.assertEquals(content, vResponse.getContent());
Assertions.assertEquals(contentType, vResponse.getContentType());
Assertions.assertEquals(headers, vResponse.getHeaders());
- Assertions.assertEquals(url, vResponse.getBaseUrl());
+ Assertions.assertEquals(url, vResponse.getUrl());
Assertions.assertEquals(proxy, vResponse.getProxy());
Assertions.assertEquals(contentStr, vResponse.getHtml());
Assertions.assertNotNull(vResponse.getJsoup());
@@ -38,4 +38,30 @@ public void testVResponse() {
Assertions.assertEquals(baseResponse, vResponse.getInner());
}
+ @Test
+ public void testJsoupRelUrl() {
+ final String path = "/test-response/info#hashtag";
+ final String headerKey = "Cookie";
+ final String headerValue = "text=json;";
+ final String url = "http://127.0.0.1" + path;
+
+ final int statusCode = 200;
+ final String contentStr = "\n" +
+ "\n" +
+ "\n" +
+ "TEST\n" +
+ "\n" +
+ "\n";
+ final byte[] content = contentStr.getBytes();
+ final ContentType contentType = ContentType.TEXT_HTML;
+ final Header[] headers = {new BasicHeader(headerKey, headerValue)};
+
+ final BaseResponse baseResponse = new BaseResponse(statusCode, url, content, contentType, headers, null);
+ final VResponse vResponse = new VResponse(baseResponse);
+ final String hrefUrl = vResponse.getJsoup().getElementById("test").absUrl("href");
+
+ Assertions.assertEquals("http://127.0.0.1/test-response/test-rel", hrefUrl);
+
+ }
+
}
diff --git a/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java b/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java
index 7dd3a8d..5a49b63 100644
--- a/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java
+++ b/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java
@@ -36,13 +36,12 @@ void testCompleted() throws StorageException {
final Request request = new VRequest(url);
final int statusCode = 200;
- final String baseUrl = request.getUrl();
final byte[] content = "IPSUM".getBytes();
final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8);
final Header[] headers = {};
final HttpHost proxy = request.getProxy();
- final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, proxy);
+ final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, proxy);
final FileManager