Skip to content

Commit

Permalink
Fix wrong url returned for redirects (#14)
Browse files Browse the repository at this point in the history
* Use redirected url

* Added redirection test

* Clean up

* Removed todo

* Clean up

* Clean up

* Add new test

* bump version

* Clean up

* Clean up

* Clean up
  • Loading branch information
lwj5 authored Sep 21, 2020
1 parent 6cb6ade commit 2f6a1f7
Show file tree
Hide file tree
Showing 20 changed files with 180 additions and 105 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<groupId>ai.preferred</groupId>
<artifactId>venom</artifactId>
<version>4.2.7-SNAPSHOT</version>
<version>4.2.7</version>
<packaging>jar</packaging>

<name>${project.groupId}:${project.artifactId}</name>
Expand Down
29 changes: 15 additions & 14 deletions src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
import ai.preferred.venom.response.BaseResponse;
import ai.preferred.venom.response.Response;
import ai.preferred.venom.utils.ResponseDecompressor;
import ai.preferred.venom.utils.UrlUtil;
import ai.preferred.venom.validator.Validator;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.apache.commons.io.IOUtils;
import org.apache.http.*;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.ContentDecoder;
import org.apache.http.nio.IOControl;
Expand All @@ -43,8 +43,9 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URI;
import java.nio.charset.UnsupportedCharsetException;
import java.util.List;
import java.util.Set;

/**
Expand Down Expand Up @@ -146,29 +147,29 @@ private byte[] getContent(final HttpEntity entity) throws IOException {
* @return An instance of base response
* @throws IOException Reading http response
*/
private BaseResponse createVenomResponse(final boolean compressed) throws IOException {
private BaseResponse createVenomResponse(final boolean compressed, final HttpContext context) throws IOException {
if (compressed) {
RESPONSE_DECOMPRESSOR.decompress(httpResponse);
}

final HttpClientContext clientContext = HttpClientContext.adapt(context);
final List<URI> redirectedLocations = clientContext.getRedirectLocations();
final String url;
if (redirectedLocations == null) {
url = request.getUrl();
} else {
url = redirectedLocations.get(redirectedLocations.size() - 1).toString();
}

final HttpEntity entity = httpResponse.getEntity();
final byte[] content = getContent(entity);
request.getDiagnostics().setSize(content.length);
final ContentType contentType = getContentType(entity);
final Header[] headers = httpResponse.getAllHeaders();

String tryBaseUrl;
try {
tryBaseUrl = UrlUtil.getBaseUrl(request);
} catch (URISyntaxException e) {
LOGGER.warn("Could not parse base URL: " + request.getUrl());
tryBaseUrl = request.getUrl();
}
final String baseUrl = tryBaseUrl;

return new BaseResponse(
httpResponse.getStatusLine().getStatusCode(),
baseUrl,
url,
content,
contentType,
headers,
Expand Down Expand Up @@ -253,7 +254,7 @@ protected final BaseResponse buildResult(final HttpContext context) throws Excep
throw new StopCodeException(statusCode, "Stop code received.");
}

final BaseResponse response = createVenomResponse(compressed);
final BaseResponse response = createVenomResponse(compressed, context);
releaseResources();

final Validator.Status status;
Expand Down
13 changes: 1 addition & 12 deletions src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import ai.preferred.venom.storage.FileManager;
import ai.preferred.venom.storage.Record;
import ai.preferred.venom.storage.StorageException;
import ai.preferred.venom.utils.UrlUtil;
import ai.preferred.venom.validator.EmptyContentValidator;
import ai.preferred.venom.validator.PipelineValidator;
import ai.preferred.venom.validator.StatusOkValidator;
Expand All @@ -35,7 +34,6 @@
import org.slf4j.LoggerFactory;

import javax.validation.constraints.NotNull;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.Future;
Expand Down Expand Up @@ -145,16 +143,7 @@ public void cancelled() {

LOGGER.debug("Record found with id: {}", record.getId());

String tryBaseUrl;
try {
tryBaseUrl = UrlUtil.getBaseUrl(request);
} catch (URISyntaxException e) {
LOGGER.warn("Could not parse base URL: " + request.getUrl());
tryBaseUrl = request.getUrl();
}
final String baseUrl = tryBaseUrl;

final StorageResponse response = new StorageResponse(record, baseUrl);
final StorageResponse response = new StorageResponse(record, request.getUrl());
final Validator.Status status = validator.isValid(Unwrappable.unwrapRequest(request), response);
if (status != Validator.Status.VALID) {
future.failed(new ValidationException(status, response, "Invalid response."));
Expand Down
23 changes: 15 additions & 8 deletions src/main/java/ai/preferred/venom/response/BaseResponse.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import org.apache.http.HttpHost;
import org.apache.http.entity.ContentType;

import javax.validation.constraints.NotNull;

/**
* @author Maksim Tkachenko
* @author Truong Quoc Tuan
Expand Down Expand Up @@ -50,7 +52,7 @@ public class BaseResponse implements Response {
/**
* The base url of this response.
*/
private final String baseUrl;
private final String url;

/**
* The proxy used to obtain response.
Expand All @@ -61,16 +63,16 @@ public class BaseResponse implements Response {
* Constructs a base response.
*
* @param statusCode Status code of the response
* @param baseUrl Base url of the response
* @param url Base url of the response
* @param content Content from the response
* @param contentType Content type of the response
* @param headers Headers from the response
* @param proxy Proxy used to obtain the response
*/
public BaseResponse(final int statusCode, final String baseUrl, final byte[] content, final ContentType contentType,
public BaseResponse(final int statusCode, final String url, final byte[] content, final ContentType contentType,
final Header[] headers, final HttpHost proxy) {
this.statusCode = statusCode;
this.baseUrl = baseUrl;
this.url = url;
this.content = content;
this.contentType = contentType;
this.headers = headers;
Expand All @@ -88,18 +90,23 @@ public final byte[] getContent() {
}

@Override
public final ContentType getContentType() {
public final @NotNull ContentType getContentType() {
return contentType;
}

@Override
public final Header[] getHeaders() {
public final @NotNull Header[] getHeaders() {
return headers;
}

@Override
public final String getBaseUrl() {
return baseUrl;
public final @NotNull String getUrl() {
return url;
}

@Override
public final @NotNull String getBaseUrl() {
return getUrl();
}

@Override
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/ai/preferred/venom/response/Response.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,21 @@ public interface Response {
@NotNull
Header[] getHeaders();

/**
* Returns the url used to fetch the response, if the request
* is redirected, this will be the final requested url.
*
* @return stripped down version of requested url
*/
@NotNull
String getUrl();

/**
* Returns the base form of the url used in this request.
*
* @return stripped down version of requested url
*/
@Deprecated
@NotNull
String getBaseUrl();

Expand Down
27 changes: 17 additions & 10 deletions src/main/java/ai/preferred/venom/response/StorageResponse.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import org.apache.http.HttpHost;
import org.apache.http.entity.ContentType;

import javax.validation.constraints.NotNull;


/**
* @author Ween Jiann Lee
Expand All @@ -33,19 +35,19 @@ public class StorageResponse implements Response, Retrievable {
private final Record<?> record;

/**
* The base url of this response.
* The url of this response.
*/
private final String baseUrl;
private final String url;

/**
* Constructs a base response.
*
* @param record record holding this response
* @param baseUrl base URL of the response
* @param record record holding this response
* @param url base URL of the response
*/
public StorageResponse(final Record<?> record, final String baseUrl) {
public StorageResponse(final Record<?> record, final String url) {
this.record = record;
this.baseUrl = baseUrl;
this.url = url;
}

@Override
Expand All @@ -59,18 +61,23 @@ public final byte[] getContent() {
}

@Override
public final ContentType getContentType() {
public final @NotNull ContentType getContentType() {
return record.getContentType();
}

@Override
public final Header[] getHeaders() {
public final @NotNull Header[] getHeaders() {
return record.getResponseHeaders();
}

@Override
public final String getBaseUrl() {
return baseUrl;
public final @NotNull String getUrl() {
return url;
}

@Override
public final @NotNull String getBaseUrl() {
return getUrl();
}

@Override
Expand Down
18 changes: 12 additions & 6 deletions src/main/java/ai/preferred/venom/response/VResponse.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import javax.validation.constraints.NotNull;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

Expand Down Expand Up @@ -62,18 +63,23 @@ public final byte[] getContent() {
}

@Override
public final ContentType getContentType() {
public final @NotNull ContentType getContentType() {
return getInner().getContentType();
}

@Override
public final Header[] getHeaders() {
public final @NotNull Header[] getHeaders() {
return getInner().getHeaders();
}

@Override
public final String getBaseUrl() {
return getInner().getBaseUrl();
public final @NotNull String getUrl() {
return getInner().getUrl();
}

@Override
public final @NotNull String getBaseUrl() {
return getInner().getUrl();
}

@Override
Expand Down Expand Up @@ -110,7 +116,7 @@ public final String getHtml(final Charset charset) {
* @return jsoup document of response
*/
public final Document getJsoup() {
return Jsoup.parse(getHtml(), getBaseUrl());
return Jsoup.parse(getHtml(), getUrl());
}

/**
Expand All @@ -120,7 +126,7 @@ public final Document getJsoup() {
* @return jsoup document of response
*/
public final Document getJsoup(final Charset charset) {
return Jsoup.parse(getHtml(charset), getBaseUrl());
return Jsoup.parse(getHtml(charset), getUrl());
}

@Override
Expand Down
1 change: 1 addition & 0 deletions src/main/java/ai/preferred/venom/utils/UrlUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ private UrlUtil() {
* @return base URL string
* @throws URISyntaxException if not a proper URL
*/
@Deprecated
public static String getBaseUrl(final Request request) throws URISyntaxException {
final URI uri = new URI(request.getUrl());
final URI baseUri = new URI(uri.getScheme(), null, uri.getHost(), uri.getPort(), uri.getPath(), null, null);
Expand Down
Loading

0 comments on commit 2f6a1f7

Please sign in to comment.