From 2f6a1f7662259a14b05ad7b9460730e21f21ff53 Mon Sep 17 00:00:00 2001 From: Ween Jiann Date: Mon, 21 Sep 2020 20:27:19 +0800 Subject: [PATCH] Fix wrong url returned for redirects (#14) * Use redirected url * Added redirection test * Clean up * Removed todo * Clean up * Clean up * Add new test * bump version * Clean up * Clean up * Clean up --- pom.xml | 2 +- .../venom/fetcher/AsyncResponseConsumer.java | 29 ++++---- .../venom/fetcher/StorageFetcher.java | 13 +--- .../venom/response/BaseResponse.java | 23 ++++--- .../ai/preferred/venom/response/Response.java | 10 +++ .../venom/response/StorageResponse.java | 27 +++++--- .../preferred/venom/response/VResponse.java | 18 +++-- .../ai/preferred/venom/utils/UrlUtil.java | 1 + .../venom/fetcher/AsyncFetcherTest.java | 66 ++++++++++++++----- .../preferred/venom/fetcher/FakeFetcher.java | 3 +- .../venom/fetcher/StorageFetcherTest.java | 12 ++-- .../venom/response/BaseResponseTest.java | 4 +- .../venom/response/StorageResponseTest.java | 4 +- .../venom/response/VResponseTest.java | 30 ++++++++- .../storage/FileManagerCallbackTest.java | 3 +- .../venom/storage/StorageRecordTest.java | 2 +- .../validator/EmptyContentValidatorTest.java | 5 +- .../validator/MimeTypeValidatorTest.java | 9 ++- .../validator/PipelineValidatorTest.java | 19 +++--- .../validator/StatusOkValidatorTest.java | 5 +- 20 files changed, 180 insertions(+), 105 deletions(-) diff --git a/pom.xml b/pom.xml index fa5cebd..6f119c4 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ ai.preferred venom - 4.2.7-SNAPSHOT + 4.2.7 jar ${project.groupId}:${project.artifactId} diff --git a/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java b/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java index 7d73456..baaf3ed 100644 --- a/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java +++ b/src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java @@ -21,12 +21,12 @@ import ai.preferred.venom.response.BaseResponse; import ai.preferred.venom.response.Response; import ai.preferred.venom.utils.ResponseDecompressor; -import ai.preferred.venom.utils.UrlUtil; import ai.preferred.venom.validator.Validator; import com.ibm.icu.text.CharsetDetector; import com.ibm.icu.text.CharsetMatch; import org.apache.commons.io.IOUtils; import org.apache.http.*; +import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.entity.ContentType; import org.apache.http.nio.ContentDecoder; import org.apache.http.nio.IOControl; @@ -43,8 +43,9 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.URISyntaxException; +import java.net.URI; import java.nio.charset.UnsupportedCharsetException; +import java.util.List; import java.util.Set; /** @@ -146,29 +147,29 @@ private byte[] getContent(final HttpEntity entity) throws IOException { * @return An instance of base response * @throws IOException Reading http response */ - private BaseResponse createVenomResponse(final boolean compressed) throws IOException { + private BaseResponse createVenomResponse(final boolean compressed, final HttpContext context) throws IOException { if (compressed) { RESPONSE_DECOMPRESSOR.decompress(httpResponse); } + final HttpClientContext clientContext = HttpClientContext.adapt(context); + final List redirectedLocations = clientContext.getRedirectLocations(); + final String url; + if (redirectedLocations == null) { + url = request.getUrl(); + } else { + url = redirectedLocations.get(redirectedLocations.size() - 1).toString(); + } + final HttpEntity entity = httpResponse.getEntity(); final byte[] content = getContent(entity); request.getDiagnostics().setSize(content.length); final ContentType contentType = getContentType(entity); final Header[] headers = httpResponse.getAllHeaders(); - String tryBaseUrl; - try { - tryBaseUrl = UrlUtil.getBaseUrl(request); - } catch (URISyntaxException e) { - LOGGER.warn("Could not parse base URL: " + request.getUrl()); - tryBaseUrl = request.getUrl(); - } - final String baseUrl = tryBaseUrl; - return new BaseResponse( httpResponse.getStatusLine().getStatusCode(), - baseUrl, + url, content, contentType, headers, @@ -253,7 +254,7 @@ protected final BaseResponse buildResult(final HttpContext context) throws Excep throw new StopCodeException(statusCode, "Stop code received."); } - final BaseResponse response = createVenomResponse(compressed); + final BaseResponse response = createVenomResponse(compressed, context); releaseResources(); final Validator.Status status; diff --git a/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java b/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java index 9c9bc55..77b437c 100644 --- a/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java +++ b/src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java @@ -24,7 +24,6 @@ import ai.preferred.venom.storage.FileManager; import ai.preferred.venom.storage.Record; import ai.preferred.venom.storage.StorageException; -import ai.preferred.venom.utils.UrlUtil; import ai.preferred.venom.validator.EmptyContentValidator; import ai.preferred.venom.validator.PipelineValidator; import ai.preferred.venom.validator.StatusOkValidator; @@ -35,7 +34,6 @@ import org.slf4j.LoggerFactory; import javax.validation.constraints.NotNull; -import java.net.URISyntaxException; import java.util.Collections; import java.util.Map; import java.util.concurrent.Future; @@ -145,16 +143,7 @@ public void cancelled() { LOGGER.debug("Record found with id: {}", record.getId()); - String tryBaseUrl; - try { - tryBaseUrl = UrlUtil.getBaseUrl(request); - } catch (URISyntaxException e) { - LOGGER.warn("Could not parse base URL: " + request.getUrl()); - tryBaseUrl = request.getUrl(); - } - final String baseUrl = tryBaseUrl; - - final StorageResponse response = new StorageResponse(record, baseUrl); + final StorageResponse response = new StorageResponse(record, request.getUrl()); final Validator.Status status = validator.isValid(Unwrappable.unwrapRequest(request), response); if (status != Validator.Status.VALID) { future.failed(new ValidationException(status, response, "Invalid response.")); diff --git a/src/main/java/ai/preferred/venom/response/BaseResponse.java b/src/main/java/ai/preferred/venom/response/BaseResponse.java index a04e2f1..95cae8c 100644 --- a/src/main/java/ai/preferred/venom/response/BaseResponse.java +++ b/src/main/java/ai/preferred/venom/response/BaseResponse.java @@ -20,6 +20,8 @@ import org.apache.http.HttpHost; import org.apache.http.entity.ContentType; +import javax.validation.constraints.NotNull; + /** * @author Maksim Tkachenko * @author Truong Quoc Tuan @@ -50,7 +52,7 @@ public class BaseResponse implements Response { /** * The base url of this response. */ - private final String baseUrl; + private final String url; /** * The proxy used to obtain response. @@ -61,16 +63,16 @@ public class BaseResponse implements Response { * Constructs a base response. * * @param statusCode Status code of the response - * @param baseUrl Base url of the response + * @param url Base url of the response * @param content Content from the response * @param contentType Content type of the response * @param headers Headers from the response * @param proxy Proxy used to obtain the response */ - public BaseResponse(final int statusCode, final String baseUrl, final byte[] content, final ContentType contentType, + public BaseResponse(final int statusCode, final String url, final byte[] content, final ContentType contentType, final Header[] headers, final HttpHost proxy) { this.statusCode = statusCode; - this.baseUrl = baseUrl; + this.url = url; this.content = content; this.contentType = contentType; this.headers = headers; @@ -88,18 +90,23 @@ public final byte[] getContent() { } @Override - public final ContentType getContentType() { + public final @NotNull ContentType getContentType() { return contentType; } @Override - public final Header[] getHeaders() { + public final @NotNull Header[] getHeaders() { return headers; } @Override - public final String getBaseUrl() { - return baseUrl; + public final @NotNull String getUrl() { + return url; + } + + @Override + public final @NotNull String getBaseUrl() { + return getUrl(); } @Override diff --git a/src/main/java/ai/preferred/venom/response/Response.java b/src/main/java/ai/preferred/venom/response/Response.java index 09e0020..36f64f4 100644 --- a/src/main/java/ai/preferred/venom/response/Response.java +++ b/src/main/java/ai/preferred/venom/response/Response.java @@ -64,11 +64,21 @@ public interface Response { @NotNull Header[] getHeaders(); + /** + * Returns the url used to fetch the response, if the request + * is redirected, this will be the final requested url. + * + * @return stripped down version of requested url + */ + @NotNull + String getUrl(); + /** * Returns the base form of the url used in this request. * * @return stripped down version of requested url */ + @Deprecated @NotNull String getBaseUrl(); diff --git a/src/main/java/ai/preferred/venom/response/StorageResponse.java b/src/main/java/ai/preferred/venom/response/StorageResponse.java index eb8a16d..6cf92b2 100644 --- a/src/main/java/ai/preferred/venom/response/StorageResponse.java +++ b/src/main/java/ai/preferred/venom/response/StorageResponse.java @@ -21,6 +21,8 @@ import org.apache.http.HttpHost; import org.apache.http.entity.ContentType; +import javax.validation.constraints.NotNull; + /** * @author Ween Jiann Lee @@ -33,19 +35,19 @@ public class StorageResponse implements Response, Retrievable { private final Record record; /** - * The base url of this response. + * The url of this response. */ - private final String baseUrl; + private final String url; /** * Constructs a base response. * - * @param record record holding this response - * @param baseUrl base URL of the response + * @param record record holding this response + * @param url base URL of the response */ - public StorageResponse(final Record record, final String baseUrl) { + public StorageResponse(final Record record, final String url) { this.record = record; - this.baseUrl = baseUrl; + this.url = url; } @Override @@ -59,18 +61,23 @@ public final byte[] getContent() { } @Override - public final ContentType getContentType() { + public final @NotNull ContentType getContentType() { return record.getContentType(); } @Override - public final Header[] getHeaders() { + public final @NotNull Header[] getHeaders() { return record.getResponseHeaders(); } @Override - public final String getBaseUrl() { - return baseUrl; + public final @NotNull String getUrl() { + return url; + } + + @Override + public final @NotNull String getBaseUrl() { + return getUrl(); } @Override diff --git a/src/main/java/ai/preferred/venom/response/VResponse.java b/src/main/java/ai/preferred/venom/response/VResponse.java index 8b013d3..38d1ca2 100644 --- a/src/main/java/ai/preferred/venom/response/VResponse.java +++ b/src/main/java/ai/preferred/venom/response/VResponse.java @@ -22,6 +22,7 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; +import javax.validation.constraints.NotNull; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -62,18 +63,23 @@ public final byte[] getContent() { } @Override - public final ContentType getContentType() { + public final @NotNull ContentType getContentType() { return getInner().getContentType(); } @Override - public final Header[] getHeaders() { + public final @NotNull Header[] getHeaders() { return getInner().getHeaders(); } @Override - public final String getBaseUrl() { - return getInner().getBaseUrl(); + public final @NotNull String getUrl() { + return getInner().getUrl(); + } + + @Override + public final @NotNull String getBaseUrl() { + return getInner().getUrl(); } @Override @@ -110,7 +116,7 @@ public final String getHtml(final Charset charset) { * @return jsoup document of response */ public final Document getJsoup() { - return Jsoup.parse(getHtml(), getBaseUrl()); + return Jsoup.parse(getHtml(), getUrl()); } /** @@ -120,7 +126,7 @@ public final Document getJsoup() { * @return jsoup document of response */ public final Document getJsoup(final Charset charset) { - return Jsoup.parse(getHtml(charset), getBaseUrl()); + return Jsoup.parse(getHtml(charset), getUrl()); } @Override diff --git a/src/main/java/ai/preferred/venom/utils/UrlUtil.java b/src/main/java/ai/preferred/venom/utils/UrlUtil.java index 7b8737f..2f61b15 100644 --- a/src/main/java/ai/preferred/venom/utils/UrlUtil.java +++ b/src/main/java/ai/preferred/venom/utils/UrlUtil.java @@ -42,6 +42,7 @@ private UrlUtil() { * @return base URL string * @throws URISyntaxException if not a proper URL */ + @Deprecated public static String getBaseUrl(final Request request) throws URISyntaxException { final URI uri = new URI(request.getUrl()); final URI baseUri = new URI(uri.getScheme(), null, uri.getHost(), uri.getPort(), uri.getPath(), null, null); diff --git a/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java b/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java index eac28a3..3c45eb8 100644 --- a/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java +++ b/src/test/java/ai/preferred/venom/fetcher/AsyncFetcherTest.java @@ -24,6 +24,7 @@ import ai.preferred.venom.storage.FileManager; import ai.preferred.venom.storage.Record; import com.github.tomakehurst.wiremock.WireMockServer; +import com.google.common.collect.ImmutableList; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -36,6 +37,7 @@ import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; @@ -91,7 +93,7 @@ public void testGet() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -114,7 +116,7 @@ public void testPost() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -137,7 +139,7 @@ public void testPost() throws ExecutionException, InterruptedException { // final Future responseFuture = fetcher.fetch(request); // final Response response = responseFuture.get(); // Assertions.assertEquals(200, response.getStatusCode()); -// Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); +// Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); // Assertions.assertEquals("text/html", response.getContentType().getMimeType()); // Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); // @@ -160,7 +162,7 @@ public void testPut() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -183,7 +185,7 @@ public void testDelete() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -206,7 +208,7 @@ public void testOptions() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -235,7 +237,7 @@ public void testParseImageContentType() throws ExecutionException, InterruptedEx final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("image/png", response.getContentType().getMimeType()); } @@ -254,7 +256,7 @@ public void testParseHTMLContentType() throws ExecutionException, InterruptedExc final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); final VResponse vResponse = new VResponse(response); @@ -283,7 +285,7 @@ public void testParseJSONCharset() throws ExecutionException, InterruptedExcepti final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/json", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -321,7 +323,7 @@ public void testUserAgent() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); final VResponse vResponse = new VResponse(response); Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web.")); @@ -346,7 +348,7 @@ public void testReplaceUserAgent() throws ExecutionException, InterruptedExcepti final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); final VResponse vResponse = new VResponse(response); Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web.")); @@ -372,7 +374,7 @@ public void testHeaders() throws ExecutionException, InterruptedException { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); final VResponse vResponse = new VResponse(response); Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web.")); @@ -400,7 +402,7 @@ public void testPresetHeaders() throws Exception { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); final VResponse vResponse = new VResponse(response); Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web.")); @@ -430,7 +432,7 @@ public void testPrependHeaders() throws Exception { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); final VResponse vResponse = new VResponse(response); Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web.")); @@ -456,7 +458,7 @@ public void testFileMangerIntegration() throws Exception { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -504,7 +506,7 @@ public void cancelled(@NotNull Request request) { final Future responseFuture = fetcher.fetch(request); final Response response = responseFuture.get(); Assertions.assertEquals(200, response.getStatusCode()); - Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getBaseUrl()); + Assertions.assertEquals("http://127.0.0.1:" + port + path, response.getUrl()); Assertions.assertEquals("text/html", response.getContentType().getMimeType()); Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); @@ -563,5 +565,37 @@ public void testClosed() throws Exception { Assertions.assertTrue(thrown.get(), "CancellationException not thrown."); } + @Test + public void testRedirection() throws Exception { + final int port = wireMockServer.port(); + configureFor("localhost", port); + final List paths = ImmutableList.of( + "/test-redirect-1", + "/test-redirect-2", + "/test-fetch" + ); + + for (int i = 0; i < paths.size() - 1; i++) { + stubFor(get(urlEqualTo(paths.get(i))) + .willReturn(temporaryRedirect(paths.get(i + 1)))); + } + + stubFor(get(urlEqualTo(paths.get(paths.size() - 1))) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "text/html; charset=utf-8") + .withBody(content))); + + final Request request = new VRequest("http://127.0.0.1:" + port + paths.get(0)); + final Future responseFuture = fetcher.fetch(request); + final Response response = responseFuture.get(); + Assertions.assertEquals(200, response.getStatusCode()); + Assertions.assertEquals("http://127.0.0.1:" + port + paths.get(paths.size() - 1), response.getUrl()); + Assertions.assertEquals("text/html", response.getContentType().getMimeType()); + Assertions.assertEquals(StandardCharsets.UTF_8, response.getContentType().getCharset()); + + final VResponse vResponse = new VResponse(response); + Assertions.assertTrue(vResponse.getHtml().contains("Venom is an open source focused crawler for the deep web.")); + } } diff --git a/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java b/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java index 42d1c0f..c6a8faf 100644 --- a/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java +++ b/src/test/java/ai/preferred/venom/fetcher/FakeFetcher.java @@ -63,13 +63,12 @@ public void start() { @Override public @NotNull Future fetch(@NotNull Request request, @NotNull Callback callback) { final int statusCode = 200; - final String baseUrl = request.getUrl(); final byte[] content = "IPSUM".getBytes(); final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); final Header[] headers = {}; final HttpHost proxy = request.getProxy(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, proxy); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, proxy); final Status status = statuses.poll(); counter.incrementAndGet(); diff --git a/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java b/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java index 1c521a4..2c620f2 100644 --- a/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java +++ b/src/test/java/ai/preferred/venom/fetcher/StorageFetcherTest.java @@ -44,7 +44,7 @@ public class StorageFetcherTest { @Test public void testTrue() throws Exception { final String path = "/test-fetch"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Request request = new VRequest(url); final int statusCode = 200; @@ -78,7 +78,7 @@ public void testHeadersTrue() throws Exception { final String path = "/test-headers"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Request request = new VRequest(url); final int statusCode = 200; @@ -115,7 +115,7 @@ public void testFetcherHeadersTrue() throws Exception { final String path = "/fetcher-headers"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Request submittedRequest = new VRequest(url); final int statusCode = 200; @@ -155,7 +155,7 @@ public void testNotFound() throws Exception { final String path = "/not-found"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Request request = new VRequest(url); final int statusCode = 200; @@ -196,7 +196,7 @@ public void testNotFound() throws Exception { @Test public void testFailure() throws Exception { final String path = "/test-failure"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Request request = new VRequest(url); final int statusCode = 200; @@ -230,7 +230,7 @@ public void testFailure() throws Exception { @Test public void testValidation() throws Exception { final String path = "/test-validation"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Request request = new VRequest(url); final int statusCode = 500; diff --git a/src/test/java/ai/preferred/venom/response/BaseResponseTest.java b/src/test/java/ai/preferred/venom/response/BaseResponseTest.java index fb8c486..9735a8d 100644 --- a/src/test/java/ai/preferred/venom/response/BaseResponseTest.java +++ b/src/test/java/ai/preferred/venom/response/BaseResponseTest.java @@ -14,7 +14,7 @@ public void testBaseResponse() { final String path = "/test-response"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final int statusCode = 200; final String contentStr = "This is a test"; @@ -29,7 +29,7 @@ public void testBaseResponse() { Assertions.assertEquals(content, baseResponse.getContent()); Assertions.assertEquals(contentType, baseResponse.getContentType()); Assertions.assertEquals(headers, baseResponse.getHeaders()); - Assertions.assertEquals(url, baseResponse.getBaseUrl()); + Assertions.assertEquals(url, baseResponse.getUrl()); Assertions.assertEquals(proxy, baseResponse.getProxy()); } diff --git a/src/test/java/ai/preferred/venom/response/StorageResponseTest.java b/src/test/java/ai/preferred/venom/response/StorageResponseTest.java index 86b7ea1..83d22c6 100644 --- a/src/test/java/ai/preferred/venom/response/StorageResponseTest.java +++ b/src/test/java/ai/preferred/venom/response/StorageResponseTest.java @@ -19,7 +19,7 @@ public void testStorageResponse() { final String path = "/test-response"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Map headerMap = Collections.singletonMap(headerKey, headerValue); final Request request = new VRequest(url, headerMap); @@ -46,7 +46,7 @@ public void testStorageResponse() { Assertions.assertEquals(content, storageResponse.getContent()); Assertions.assertEquals(contentType, storageResponse.getContentType()); Assertions.assertEquals(headers, storageResponse.getHeaders()); - Assertions.assertEquals(url, storageResponse.getBaseUrl()); + Assertions.assertEquals(url, storageResponse.getUrl()); Assertions.assertNull(storageResponse.getProxy()); Assertions.assertEquals(storageRecord, storageResponse.getRecord()); } diff --git a/src/test/java/ai/preferred/venom/response/VResponseTest.java b/src/test/java/ai/preferred/venom/response/VResponseTest.java index 8fdc9b6..0d821a6 100644 --- a/src/test/java/ai/preferred/venom/response/VResponseTest.java +++ b/src/test/java/ai/preferred/venom/response/VResponseTest.java @@ -14,7 +14,7 @@ public void testVResponse() { final String path = "/test-response"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final int statusCode = 200; final String contentStr = "This is a test"; @@ -30,7 +30,7 @@ public void testVResponse() { Assertions.assertEquals(content, vResponse.getContent()); Assertions.assertEquals(contentType, vResponse.getContentType()); Assertions.assertEquals(headers, vResponse.getHeaders()); - Assertions.assertEquals(url, vResponse.getBaseUrl()); + Assertions.assertEquals(url, vResponse.getUrl()); Assertions.assertEquals(proxy, vResponse.getProxy()); Assertions.assertEquals(contentStr, vResponse.getHtml()); Assertions.assertNotNull(vResponse.getJsoup()); @@ -38,4 +38,30 @@ public void testVResponse() { Assertions.assertEquals(baseResponse, vResponse.getInner()); } + @Test + public void testJsoupRelUrl() { + final String path = "/test-response/info#hashtag"; + final String headerKey = "Cookie"; + final String headerValue = "text=json;"; + final String url = "http://127.0.0.1" + path; + + final int statusCode = 200; + final String contentStr = "\n" + + "\n" + + "\n" + + "TEST\n" + + "\n" + + "\n"; + final byte[] content = contentStr.getBytes(); + final ContentType contentType = ContentType.TEXT_HTML; + final Header[] headers = {new BasicHeader(headerKey, headerValue)}; + + final BaseResponse baseResponse = new BaseResponse(statusCode, url, content, contentType, headers, null); + final VResponse vResponse = new VResponse(baseResponse); + final String hrefUrl = vResponse.getJsoup().getElementById("test").absUrl("href"); + + Assertions.assertEquals("http://127.0.0.1/test-response/test-rel", hrefUrl); + + } + } diff --git a/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java b/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java index 7dd3a8d..5a49b63 100644 --- a/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java +++ b/src/test/java/ai/preferred/venom/storage/FileManagerCallbackTest.java @@ -36,13 +36,12 @@ void testCompleted() throws StorageException { final Request request = new VRequest(url); final int statusCode = 200; - final String baseUrl = request.getUrl(); final byte[] content = "IPSUM".getBytes(); final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); final Header[] headers = {}; final HttpHost proxy = request.getProxy(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, proxy); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, proxy); final FileManager fileManager = new FakeFileManager(); fileManager.getCallback().completed(request, response); diff --git a/src/test/java/ai/preferred/venom/storage/StorageRecordTest.java b/src/test/java/ai/preferred/venom/storage/StorageRecordTest.java index df4ed9f..25605a5 100644 --- a/src/test/java/ai/preferred/venom/storage/StorageRecordTest.java +++ b/src/test/java/ai/preferred/venom/storage/StorageRecordTest.java @@ -34,7 +34,7 @@ void test() { final String path = "/test-headers"; final String headerKey = "Cookie"; final String headerValue = "text=json;"; - final String url = "http://127.0.0.1/" + path; + final String url = "http://127.0.0.1" + path; final Map headerMap = Collections.singletonMap(headerKey, headerValue); final Request request = new VRequest(url, headerMap); diff --git a/src/test/java/ai/preferred/venom/validator/EmptyContentValidatorTest.java b/src/test/java/ai/preferred/venom/validator/EmptyContentValidatorTest.java index d1ade7f..f373908 100644 --- a/src/test/java/ai/preferred/venom/validator/EmptyContentValidatorTest.java +++ b/src/test/java/ai/preferred/venom/validator/EmptyContentValidatorTest.java @@ -31,21 +31,20 @@ public class EmptyContentValidatorTest { private final Request request = new VRequest("https://venom.preferred.ai"); private final int statusCode = 200; - private final String baseUrl = "https://venom.preferred.ai"; private final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); private final Header[] headers = {}; @Test public void testEmptyContent() { final byte[] content = "".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.INVALID_CONTENT, EmptyContentValidator.INSTANCE.isValid(request, response)); } @Test public void testValidContent() { final byte[] content = "IPSUM".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.VALID, EmptyContentValidator.INSTANCE.isValid(request, response)); } diff --git a/src/test/java/ai/preferred/venom/validator/MimeTypeValidatorTest.java b/src/test/java/ai/preferred/venom/validator/MimeTypeValidatorTest.java index a2a5af0..9faed68 100644 --- a/src/test/java/ai/preferred/venom/validator/MimeTypeValidatorTest.java +++ b/src/test/java/ai/preferred/venom/validator/MimeTypeValidatorTest.java @@ -32,14 +32,13 @@ public class MimeTypeValidatorTest { private final Request request = new VRequest("https://venom.preferred.ai"); private final int statusCode = 200; - private final String baseUrl = "https://venom.preferred.ai"; private final byte[] content = "IPSUM".getBytes(); private final Header[] headers = {}; @Test public void testValidMimeTypePattern() { final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.VALID, new MimeTypeValidator(Pattern.compile("^text.*")).isValid(request, response)); } @@ -47,7 +46,7 @@ public void testValidMimeTypePattern() { @Test public void testInvalidMimeTypePattern() { final ContentType contentType = ContentType.create("text/json", StandardCharsets.UTF_8); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.INVALID_CONTENT, new MimeTypeValidator(Pattern.compile("^image.*")).isValid(request, response)); } @@ -55,7 +54,7 @@ public void testInvalidMimeTypePattern() { @Test public void testValidMimeTypeString() { final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.VALID, new MimeTypeValidator("^text.*").isValid(request, response)); } @@ -63,7 +62,7 @@ public void testValidMimeTypeString() { @Test public void testInvalidMimeTypeString() { final ContentType contentType = ContentType.create("text/json", StandardCharsets.UTF_8); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.INVALID_CONTENT, new MimeTypeValidator("^image.*").isValid(request, response)); } diff --git a/src/test/java/ai/preferred/venom/validator/PipelineValidatorTest.java b/src/test/java/ai/preferred/venom/validator/PipelineValidatorTest.java index 3ceeb1c..23eddb1 100644 --- a/src/test/java/ai/preferred/venom/validator/PipelineValidatorTest.java +++ b/src/test/java/ai/preferred/venom/validator/PipelineValidatorTest.java @@ -32,7 +32,6 @@ public class PipelineValidatorTest { private final Request request = new VRequest("https://venom.preferred.ai"); - private final String baseUrl = "https://venom.preferred.ai"; private final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); private final Header[] headers = {}; @@ -41,7 +40,7 @@ public class PipelineValidatorTest { public void testValidPipeline() { final int statusCode = 200; final byte[] content = "IPSUM".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final Validator.Status status = new PipelineValidator(StatusOkValidator.INSTANCE, EmptyContentValidator.INSTANCE) .isValid(request, response); @@ -52,7 +51,7 @@ public void testValidPipeline() { public void testValidPipelineList() { final int statusCode = 200; final byte[] content = "IPSUM".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final List validators = new LinkedList<>(); validators.add(StatusOkValidator.INSTANCE); validators.add(EmptyContentValidator.INSTANCE); @@ -66,7 +65,7 @@ public void testValidPipelineList() { public void testFirstInvalidPipeline() { final int statusCode = 400; final byte[] content = "IPSUM".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final Validator.Status status = new PipelineValidator(StatusOkValidator.INSTANCE, EmptyContentValidator.INSTANCE) .isValid(request, response); @@ -77,7 +76,7 @@ public void testFirstInvalidPipeline() { public void testFirstInvalidPipelineList() { final int statusCode = 400; final byte[] content = "IPSUM".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final List validators = new LinkedList<>(); validators.add(StatusOkValidator.INSTANCE); validators.add(EmptyContentValidator.INSTANCE); @@ -91,7 +90,7 @@ public void testFirstInvalidPipelineList() { public void testSecondInvalidPipeline() { final int statusCode = 200; final byte[] content = "".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final Validator.Status status = new PipelineValidator(StatusOkValidator.INSTANCE, EmptyContentValidator.INSTANCE) .isValid(request, response); @@ -102,7 +101,7 @@ public void testSecondInvalidPipeline() { public void testSecondInvalidPipelineList() { final int statusCode = 200; final byte[] content = "".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final List validators = new LinkedList<>(); validators.add(StatusOkValidator.INSTANCE); validators.add(EmptyContentValidator.INSTANCE); @@ -116,7 +115,7 @@ public void testSecondInvalidPipelineList() { public void testMultiInvalidPipeline() { final int statusCode = 400; final byte[] content = "".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final Validator.Status status = new PipelineValidator(StatusOkValidator.INSTANCE, EmptyContentValidator.INSTANCE) .isValid(request, response); @@ -127,7 +126,7 @@ public void testMultiInvalidPipeline() { public void testMultiInvalidPipelineList() { final int statusCode = 400; final byte[] content = "".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final List validators = new LinkedList<>(); validators.add(StatusOkValidator.INSTANCE); validators.add(EmptyContentValidator.INSTANCE); @@ -141,7 +140,7 @@ public void testMultiInvalidPipelineList() { public void testNullInPipeline() { final int statusCode = 400; final byte[] content = "".getBytes(); - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); final Validator.Status status = new PipelineValidator(StatusOkValidator.INSTANCE, null) .isValid(request, response); diff --git a/src/test/java/ai/preferred/venom/validator/StatusOkValidatorTest.java b/src/test/java/ai/preferred/venom/validator/StatusOkValidatorTest.java index 78330f1..d30aa6a 100644 --- a/src/test/java/ai/preferred/venom/validator/StatusOkValidatorTest.java +++ b/src/test/java/ai/preferred/venom/validator/StatusOkValidatorTest.java @@ -31,12 +31,11 @@ public class StatusOkValidatorTest { private final Request request = new VRequest("https://venom.preferred.ai"); private final byte[] content = "".getBytes(); - private final String baseUrl = "https://venom.preferred.ai"; private final ContentType contentType = ContentType.create("text/html", StandardCharsets.UTF_8); private final Header[] headers = {}; private void assertInvalid(int statusCode) { - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.INVALID_STATUS_CODE, StatusOkValidator.INSTANCE.isValid(request, response)); } @@ -49,7 +48,7 @@ public void testInvalidStatusCode() { @Test public void testValidStatusCode() { final int statusCode = 200; - final Response response = new BaseResponse(statusCode, baseUrl, content, contentType, headers, null); + final Response response = new BaseResponse(statusCode, request.getUrl(), content, contentType, headers, null); Assertions.assertEquals(Validator.Status.VALID, StatusOkValidator.INSTANCE.isValid(request, response)); }