Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-http plugin #59

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
423 changes: 423 additions & 0 deletions docs/HTTP-transform.md

Large diffs are not rendered by default.

Binary file added icons/HTTP-transform.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
<gson.version>2.8.5</gson.version>
<hadoop.version>2.3.0</hadoop.version>
<httpcomponents.version>4.5.9</httpcomponents.version>
<hydrator.version>2.4.0-SNAPSHOT</hydrator.version>
<hydrator.version>2.6.0</hydrator.version>
<jackson.version>2.9.9</jackson.version>
<junit.version>4.11</junit.version>
<jython.version>2.7.1</jython.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
*/
public class HttpRecordReader extends RecordReader<NullWritable, BasePage> {
private static final Logger LOG = LoggerFactory.getLogger(HttpRecordReader.class);
private static final Gson gson = new GsonBuilder().create();
protected static final Gson GSON = new GsonBuilder().create();

private BaseHttpPaginationIterator httpPaginationIterator;
protected BaseHttpPaginationIterator httpPaginationIterator;
private BasePage value;

/**
Expand All @@ -51,7 +51,7 @@ public class HttpRecordReader extends RecordReader<NullWritable, BasePage> {
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
Configuration conf = taskAttemptContext.getConfiguration();
String configJson = conf.get(HttpInputFormatProvider.PROPERTY_CONFIG_JSON);
HttpBatchSourceConfig httpBatchSourceConfig = gson.fromJson(configJson, HttpBatchSourceConfig.class);
HttpBatchSourceConfig httpBatchSourceConfig = GSON.fromJson(configJson, HttpBatchSourceConfig.class);
httpPaginationIterator = PaginationIteratorFactory.createInstance(httpBatchSourceConfig, null);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -671,8 +671,16 @@ public List<String> getTransportProtocolsList() {
}

public void validate() {
validate(true, true);
}

public void validate(boolean validateURL) {
validate(validateURL, true);
}

public void validate(boolean validateURL, boolean validateErrorHandling) {
// Validate URL
if (!containsMacro(PROPERTY_URL)) {
if (validateURL && !containsMacro(PROPERTY_URL)) {
try {
// replace with placeholder with anything just during pagination
new URI(getUrl().replaceAll(PAGINATION_INDEX_PLACEHOLDER_REGEX, "0"));
Expand All @@ -683,7 +691,7 @@ public void validate() {
}

// Validate HTTP Error Handling Map
if (!containsMacro(PROPERTY_HTTP_ERROR_HANDLING)) {
if (validateErrorHandling && !containsMacro(PROPERTY_HTTP_ERROR_HANDLING)) {
List<HttpErrorHandlerEntity> httpErrorsHandlingEntries = getHttpErrorHandlingEntries();
boolean supportsSkippingPages = PaginationIteratorFactory
.createInstance(this, null).supportsSkippingPages();
Expand Down Expand Up @@ -774,9 +782,7 @@ PAGINATION_INDEX_PLACEHOLDER, getPaginationType()),
String reasonFormat = String.format("page format is '%s'", getFormat());

if (getFormat().equals(PageFormat.JSON) || getFormat().equals(PageFormat.XML)) {
if (!getFormat().equals(PageFormat.JSON)) {
assertIsSet(getResultPath(), PROPERTY_RESULT_PATH, reasonFormat);
}
assertIsSet(getResultPath(), PROPERTY_RESULT_PATH, reasonFormat);
getFullFieldsMapping(); // can be null, but call getter to verify correctness of regexps
} else {
assertIsNotSet(getResultPath(), PROPERTY_RESULT_PATH, reasonFormat);
Expand Down Expand Up @@ -861,15 +867,20 @@ public static List<String> getListFromString(String value) {
}

public static Map<String, String> getMapFromKeyValueString(String keyValueString) {
return getMapFromKeyValueString(keyValueString, ",", ":");
}

public static Map<String, String> getMapFromKeyValueString(String keyValueString, String delimiter,
String kvDelimiter) {
Map<String, String> result = new LinkedHashMap<>();

if (Strings.isNullOrEmpty(keyValueString)) {
return result;
}

String[] mappings = keyValueString.split(",");
String[] mappings = keyValueString.split(delimiter);
for (String map : mappings) {
String[] columns = map.split(":");
String[] columns = map.split(kvDelimiter);
result.put(columns[0], columns[1]);
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ public abstract class BaseHttpPaginationIterator implements Iterator<BasePage>,
private Integer httpStatusCode;
private HttpResponse response;

public BaseHttpPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
public BaseHttpPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state, HttpClient httpClient) {
this.config = config;
this.httpClient = new HttpClient(config);
this.httpClient = httpClient;
this.nextPageUrl = config.getUrl();
this.httpErrorHandler = new HttpErrorHandler(config);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;
import io.cdap.plugin.http.source.common.pagination.state.PaginationIteratorState;
Expand All @@ -32,8 +33,8 @@
public class CustomPaginationIterator extends BaseHttpPaginationIterator {
private final JythonPythonExecutor pythonExecutor;

public CustomPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
super(config, state);
public CustomPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state, HttpClient httpClient) {
super(config, state, httpClient);
pythonExecutor = new JythonPythonExecutor(config.getCustomPaginationCode());
pythonExecutor.initialize();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;
import io.cdap.plugin.http.source.common.pagination.state.IndexPaginationIteratorState;
Expand All @@ -36,8 +37,9 @@ public class IncrementAnIndexPaginationIterator extends BaseHttpPaginationIterat

private Long index;

public IncrementAnIndexPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
super(config, state);
public IncrementAnIndexPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state,
HttpClient httpClient) {
super(config, state, httpClient);
this.indexIncrement = config.getIndexIncrement();
this.maxIndex = config.getMaxIndex();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;
import io.cdap.plugin.http.source.common.pagination.state.PaginationIteratorState;
Expand All @@ -34,10 +35,13 @@ public class LinkInResponseBodyPaginationIterator extends BaseHttpPaginationIter

private final String address;

public LinkInResponseBodyPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
super(config, state);
boolean isMultiQuery;
public LinkInResponseBodyPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state,
HttpClient httpClient, boolean isMultiQuery) {
super(config, state, httpClient);
URI uri = URI.create(config.getUrl());
this.address = uri.getScheme() + "://" + uri.getAuthority();
this.isMultiQuery = isMultiQuery;
}

@Override
Expand All @@ -62,6 +66,6 @@ protected String getNextPageUrl(HttpResponse response, BasePage page) {

@Override
public boolean supportsSkippingPages() {
return false;
return isMultiQuery;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;
import io.cdap.plugin.http.source.common.pagination.state.PaginationIteratorState;
Expand All @@ -34,8 +35,9 @@ public class LinkInResponseHeaderPaginationIterator extends BaseHttpPaginationIt
private static final Logger LOG = LoggerFactory.getLogger(LinkInResponseHeaderPaginationIterator.class);
private static final Pattern nextLinkPattern = Pattern.compile("<(.+)>; rel=next");

public LinkInResponseHeaderPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
super(config, state);
public LinkInResponseHeaderPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state,
HttpClient httpClient) {
super(config, state, httpClient);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;
import io.cdap.plugin.http.source.common.pagination.state.PaginationIteratorState;
Expand All @@ -28,8 +29,12 @@
public class NonePaginationIterator extends BaseHttpPaginationIterator {
private static final Logger LOG = LoggerFactory.getLogger(NonePaginationIterator.class);

public NonePaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
super(config, state);
boolean isMultiQuery;

public NonePaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state, HttpClient httpClient,
boolean isMultiQuery) {
super(config, state, httpClient);
this.isMultiQuery = isMultiQuery;
}

@Override
Expand All @@ -39,6 +44,6 @@ protected String getNextPageUrl(HttpResponse response, BasePage page) {

@Override
public boolean supportsSkippingPages() {
return false;
return isMultiQuery;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,42 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.pagination.state.PaginationIteratorState;

/**
* A factory which creates instance of {@BaseHttpPaginationIterator} in accordance to pagination type configured in
* the input config.
*/
public class PaginationIteratorFactory {
public static BaseHttpPaginationIterator createInstance(BaseHttpSourceConfig config, PaginationIteratorState state) {
public static BaseHttpPaginationIterator createInstance(BaseHttpSourceConfig config, PaginationIteratorState state,
boolean isMultiQuery,
HttpClient httpClient) {
switch (config.getPaginationType()) {
case NONE:
return new NonePaginationIterator(config, state);
return new NonePaginationIterator(config, state, httpClient, isMultiQuery);
case LINK_IN_RESPONSE_HEADER:
return new LinkInResponseHeaderPaginationIterator(config, state);
return new LinkInResponseHeaderPaginationIterator(config, state, httpClient);
case LINK_IN_RESPONSE_BODY:
return new LinkInResponseBodyPaginationIterator(config, state);
return new LinkInResponseBodyPaginationIterator(config, state, httpClient, isMultiQuery);
case TOKEN_IN_RESPONSE_BODY:
return new TokenPaginationIterator(config, state);
return new TokenPaginationIterator(config, state, httpClient, isMultiQuery);
case INCREMENT_AN_INDEX:
return new IncrementAnIndexPaginationIterator(config, state);
return new IncrementAnIndexPaginationIterator(config, state, httpClient);
case CUSTOM:
return new CustomPaginationIterator(config, state);
return new CustomPaginationIterator(config, state, httpClient);
default:
throw new IllegalArgumentException(
String.format("Unsupported pagination type: '%s'", config.getPaginationType()));
String.format("Unsupported pagination type: '%s'", config.getPaginationType()));
}
}

public static BaseHttpPaginationIterator createInstance(BaseHttpSourceConfig config, PaginationIteratorState state,
HttpClient httpClient) {
return createInstance(config, state, false, httpClient);
}

public static BaseHttpPaginationIterator createInstance(BaseHttpSourceConfig config, PaginationIteratorState state) {
return createInstance(config, state, false, new HttpClient(config));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package io.cdap.plugin.http.source.common.pagination;

import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;
import io.cdap.plugin.http.source.common.pagination.state.PaginationIteratorState;
Expand All @@ -33,8 +34,12 @@
public class TokenPaginationIterator extends BaseHttpPaginationIterator {
private static final Logger LOG = LoggerFactory.getLogger(TokenPaginationIterator.class);

public TokenPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state) {
super(config, state);
boolean isMultiQuery;

public TokenPaginationIterator(BaseHttpSourceConfig config, PaginationIteratorState state, HttpClient httpClient
, boolean isMultiQuery) {
super(config, state, httpClient);
this.isMultiQuery = isMultiQuery;
}

@Override
Expand All @@ -56,6 +61,6 @@ protected String getNextPageUrl(HttpResponse response, BasePage page) {

@Override
public boolean supportsSkippingPages() {
return false;
return isMultiQuery;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright © 2019 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.plugin.http.transform;

import io.cdap.plugin.http.source.common.http.HttpClient;
import io.cdap.plugin.http.source.common.pagination.BaseHttpPaginationIterator;
import io.cdap.plugin.http.source.common.pagination.PaginationIteratorFactory;
import io.cdap.plugin.http.source.common.pagination.page.BasePage;

import java.io.IOException;

/**
* RecordReader implementation, which reads text records representations and http codes
* using {@link BaseHttpPaginationIterator} subclasses.
*/
public class DynamicHttpRecordReader {
protected BaseHttpPaginationIterator httpPaginationIterator;
private BasePage value;

public DynamicHttpRecordReader(DynamicHttpTransformConfig dynamicHttpTransformConfig, HttpClient httpClient) {
httpPaginationIterator = PaginationIteratorFactory.createInstance(dynamicHttpTransformConfig, null,
true, httpClient);
}

public boolean nextKeyValue() {
if (!httpPaginationIterator.hasNext()) {
return false;
}
value = httpPaginationIterator.next();
return true;
}

public BasePage getCurrentValue() {
return value;
}

public void close() throws IOException {
if (httpPaginationIterator != null) {
httpPaginationIterator.close();
}
}
}
Loading