Skip to content

Commit

Permalink
Add customized result index in data source etc
Browse files Browse the repository at this point in the history
This PR
- Introduce `spark.flint.datasource.name` parameter for data source specification.
- Enhance data source creation to allow custom result indices; fallback to default if unavailable.
- Include error details in the async result response, sourced from the result index.
- Migrate to `org.apache.spark.sql.FlintJob` following updates in OpenSearch-Spark.
- Populate query status from result index over EMR-S job status to handle edge cases where jobs may succeed, but queries or mappings fail.

Testing done:
1. manual testing including if with or without custom result index async query still works
2. added new unit tests

Signed-off-by: Kaituo Li <kaituo@amazon.com>
  • Loading branch information
kaituo committed Oct 4, 2023
1 parent 9a2151b commit cc6b5a6
Show file tree
Hide file tree
Showing 30 changed files with 287 additions and 94 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,20 @@ public class DataSourceMetadata {

@JsonProperty private Map<String, String> properties;

@JsonProperty private String resultIndex;

public DataSourceMetadata(
String name,
DataSourceType connector,
List<String> allowedRoles,
Map<String, String> properties) {
Map<String, String> properties,
String resultIndex) {
this.name = name;
this.connector = connector;
this.description = StringUtils.EMPTY;
this.properties = properties;
this.allowedRoles = allowedRoles;
this.resultIndex = resultIndex;
}

public DataSourceMetadata() {
Expand All @@ -69,6 +73,7 @@ public static DataSourceMetadata defaultOpenSearchDataSourceMetadata() {
DEFAULT_DATASOURCE_NAME,
DataSourceType.OPENSEARCH,
Collections.emptyList(),
ImmutableMap.of());
ImmutableMap.of(),
null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ public Set<DataSourceMetadata> getDataSourceMetadata(boolean isDefaultDataSource
ds.getName(),
ds.getConnectorType(),
Collections.emptyList(),
ImmutableMap.of()))
ImmutableMap.of(),
null))
.collect(Collectors.toSet());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void testIterator() {
dataSource.getName(),
dataSource.getConnectorType(),
Collections.emptyList(),
ImmutableMap.of()))
ImmutableMap.of(),
null))
.collect(Collectors.toSet());
when(dataSourceService.getDataSourceMetadata(true)).thenReturn(dataSourceMetadata);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class XContentParserUtils {
public static final String PROPERTIES_FIELD = "properties";
public static final String ALLOWED_ROLES_FIELD = "allowedRoles";

public static final String RESULT_INDEX_FIELD = "resultIndex";

/**
* Convert xcontent parser to DataSourceMetadata.
*
Expand All @@ -45,6 +47,7 @@ public static DataSourceMetadata toDataSourceMetadata(XContentParser parser) thr
DataSourceType connector = null;
List<String> allowedRoles = new ArrayList<>();
Map<String, String> properties = new HashMap<>();
String resultIndex = null;
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
Expand Down Expand Up @@ -73,14 +76,18 @@ public static DataSourceMetadata toDataSourceMetadata(XContentParser parser) thr
properties.put(key, value);
}
break;
case RESULT_INDEX_FIELD:
resultIndex = parser.textOrNull();
break;
default:
throw new IllegalArgumentException("Unknown field: " + fieldName);
}
}
if (name == null || connector == null) {
throw new IllegalArgumentException("name and connector are required fields.");
}
return new DataSourceMetadata(name, description, connector, allowedRoles, properties);
return new DataSourceMetadata(
name, description, connector, allowedRoles, properties, resultIndex);
}

/**
Expand Down Expand Up @@ -122,6 +129,7 @@ public static XContentBuilder convertToXContent(DataSourceMetadata metadata) thr
builder.field(entry.getKey(), entry.getValue());
}
builder.endObject();
builder.field(RESULT_INDEX_FIELD, metadata.getResultIndex());
builder.endObject();
return builder;
}
Expand Down
2 changes: 2 additions & 0 deletions datasources/src/main/resources/datasources-index-mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ properties:
keyword:
type: keyword
connector:
type: keyword
resultIndex:
type: keyword
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ void testRemovalOfAuthorizationInfo() {
"testDS",
DataSourceType.PROMETHEUS,
Collections.singletonList("prometheus_access"),
properties);
properties,
null);
when(dataSourceMetadataStorage.getDataSourceMetadata("testDS"))
.thenReturn(Optional.of(dataSourceMetadata));

Expand Down Expand Up @@ -398,7 +399,8 @@ void testGetRawDataSourceMetadata() {
"testDS",
DataSourceType.PROMETHEUS,
Collections.singletonList("prometheus_access"),
properties);
properties,
null);
when(dataSourceMetadataStorage.getDataSourceMetadata("testDS"))
.thenReturn(Optional.of(dataSourceMetadata));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public void testConvertToXContent() {
XContentBuilder contentBuilder = XContentParserUtils.convertToXContent(dataSourceMetadata);
String contentString = BytesReference.bytes(contentBuilder).utf8ToString();
Assertions.assertEquals(
"{\"name\":\"testDS\",\"description\":\"\",\"connector\":\"PROMETHEUS\",\"allowedRoles\":[\"prometheus_access\"],\"properties\":{\"prometheus.uri\":\"https://localhost:9090\"}}",
"{\"name\":\"testDS\",\"description\":\"\",\"connector\":\"PROMETHEUS\",\"allowedRoles\":[\"prometheus_access\"],\"properties\":{\"prometheus.uri\":\"https://localhost:9090\"},\"resultIndex\":null}",
contentString);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ public void createDataSourceAPITest() {
"prometheus.auth.username",
"username",
"prometheus.auth.password",
"password"));
"password"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down Expand Up @@ -92,7 +93,8 @@ public void updateDataSourceAPITest() {
"update_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://localhost:9090"));
ImmutableMap.of("prometheus.uri", "https://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
client().performRequest(createRequest);
// Datasource is not immediately created. so introducing a sleep of 2s.
Expand All @@ -104,7 +106,8 @@ public void updateDataSourceAPITest() {
"update_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://randomtest.com:9090"));
ImmutableMap.of("prometheus.uri", "https://randomtest.com:9090"),
null);
Request updateRequest = getUpdateDataSourceRequest(updateDSM);
Response updateResponse = client().performRequest(updateRequest);
Assert.assertEquals(200, updateResponse.getStatusLine().getStatusCode());
Expand Down Expand Up @@ -137,7 +140,8 @@ public void deleteDataSourceTest() {
"delete_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://localhost:9090"));
ImmutableMap.of("prometheus.uri", "https://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
client().performRequest(createRequest);
// Datasource is not immediately created. so introducing a sleep of 2s.
Expand Down Expand Up @@ -175,7 +179,8 @@ public void getAllDataSourceTest() {
"get_all_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://localhost:9090"));
ImmutableMap.of("prometheus.uri", "https://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
client().performRequest(createRequest);
// Datasource is not immediately created. so introducing a sleep of 2s.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ protected void init() throws InterruptedException, IOException {
"my_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "http://localhost:9090"));
ImmutableMap.of("prometheus.uri", "http://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ protected void init() throws InterruptedException, IOException {
"my_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "http://localhost:9090"));
ImmutableMap.of("prometheus.uri", "http://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ protected void init() throws InterruptedException, IOException {
"my_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "http://localhost:9090"));
ImmutableMap.of("prometheus.uri", "http://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import static org.opensearch.sql.common.setting.Settings.Key.CLUSTER_NAME;
import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG;
import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD;
import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD;

import com.amazonaws.services.emrserverless.model.JobRunState;
import java.security.AccessController;
Expand All @@ -15,6 +17,7 @@
import java.util.List;
import java.util.Optional;
import lombok.AllArgsConstructor;
import org.apache.commons.lang3.tuple.Pair;
import org.json.JSONObject;
import org.opensearch.cluster.ClusterName;
import org.opensearch.sql.common.setting.Settings;
Expand Down Expand Up @@ -64,17 +67,22 @@ public CreateAsyncQueryResponse createAsyncQuery(
SparkExecutionEngineConfig.toSparkExecutionEngineConfig(
sparkExecutionEngineConfigString));
ClusterName clusterName = settings.getSettingValue(CLUSTER_NAME);
String jobId =
String query = createAsyncQueryRequest.getQuery();
Pair<String, String> jobIdResultIndexPair =
sparkQueryDispatcher.dispatch(
new DispatchQueryRequest(
sparkExecutionEngineConfig.getApplicationId(),
createAsyncQueryRequest.getQuery(),
query,
createAsyncQueryRequest.getLang(),
sparkExecutionEngineConfig.getExecutionRoleARN(),
clusterName.value()));

asyncQueryJobMetadataStorageService.storeJobMetadata(
new AsyncQueryJobMetadata(jobId, sparkExecutionEngineConfig.getApplicationId()));
return new CreateAsyncQueryResponse(jobId);
new AsyncQueryJobMetadata(
jobIdResultIndexPair.getLeft(),
sparkExecutionEngineConfig.getApplicationId(),
jobIdResultIndexPair.getRight()));
return new CreateAsyncQueryResponse(jobIdResultIndexPair.getLeft());
}

@Override
Expand All @@ -85,18 +93,24 @@ public AsyncQueryExecutionResponse getAsyncQueryResults(String queryId) {
if (jobMetadata.isPresent()) {
JSONObject jsonObject =
sparkQueryDispatcher.getQueryResponse(
jobMetadata.get().getApplicationId(), jobMetadata.get().getJobId());
if (JobRunState.SUCCESS.toString().equals(jsonObject.getString("status"))) {
jobMetadata.get().getApplicationId(),
jobMetadata.get().getJobId(),
jobMetadata.get().getResultIndex());
if (JobRunState.SUCCESS.toString().equals(jsonObject.getString(STATUS_FIELD))) {
DefaultSparkSqlFunctionResponseHandle sparkSqlFunctionResponseHandle =
new DefaultSparkSqlFunctionResponseHandle(jsonObject);
List<ExprValue> result = new ArrayList<>();
while (sparkSqlFunctionResponseHandle.hasNext()) {
result.add(sparkSqlFunctionResponseHandle.next());
}
return new AsyncQueryExecutionResponse(
JobRunState.SUCCESS.toString(), sparkSqlFunctionResponseHandle.schema(), result);
JobRunState.SUCCESS.toString(), sparkSqlFunctionResponseHandle.schema(), result, null);
} else {
return new AsyncQueryExecutionResponse(jsonObject.getString("status"), null, null);
return new AsyncQueryExecutionResponse(
jsonObject.optString(STATUS_FIELD, JobRunState.FAILED.toString()),
null,
null,
jsonObject.optString(ERROR_FIELD, ""));
}
}
throw new AsyncQueryNotFoundException(String.format("QueryId: %s not found", queryId));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ public class AsyncQueryExecutionResponse {
private final String status;
private final ExecutionEngine.Schema schema;
private final List<ExprValue> results;
private final String error;
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
public class AsyncQueryJobMetadata {
private String jobId;
private String applicationId;
private String resultIndex;

@Override
public String toString() {
Expand All @@ -44,6 +45,7 @@ public static XContentBuilder convertToXContent(AsyncQueryJobMetadata metadata)
builder.startObject();
builder.field("jobId", metadata.getJobId());
builder.field("applicationId", metadata.getApplicationId());
builder.field("resultIndex", metadata.getResultIndex());
builder.endObject();
return builder;
}
Expand Down Expand Up @@ -77,6 +79,7 @@ public static AsyncQueryJobMetadata toJobMetadata(String json) throws IOExceptio
public static AsyncQueryJobMetadata toJobMetadata(XContentParser parser) throws IOException {
String jobId = null;
String applicationId = null;
String resultIndex = null;
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
Expand All @@ -88,13 +91,16 @@ public static AsyncQueryJobMetadata toJobMetadata(XContentParser parser) throws
case "applicationId":
applicationId = parser.textOrNull();
break;
case "resultIndex":
resultIndex = parser.textOrNull();
break;
default:
throw new IllegalArgumentException("Unknown field: " + fieldName);
}
}
if (jobId == null || applicationId == null) {
throw new IllegalArgumentException("jobId and applicationId are required fields.");
}
return new AsyncQueryJobMetadata(jobId, applicationId);
return new AsyncQueryJobMetadata(jobId, applicationId, resultIndex);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,26 @@
public class AsyncQueryResult extends QueryResult {

@Getter private final String status;
@Getter private final String error;

public AsyncQueryResult(
String status,
ExecutionEngine.Schema schema,
Collection<ExprValue> exprValues,
Cursor cursor) {
Cursor cursor,
String error) {
super(schema, exprValues, cursor);
this.status = status;
this.error = error;
}

public AsyncQueryResult(
String status, ExecutionEngine.Schema schema, Collection<ExprValue> exprValues) {
String status,
ExecutionEngine.Schema schema,
Collection<ExprValue> exprValues,
String error) {
super(schema, exprValues);
this.status = status;
this.error = error;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ public EmrServerlessClientImplEMR(AWSEMRServerless emrServerless) {

@Override
public String startJobRun(StartJobRequest startJobRequest) {
String resultIndex =
startJobRequest.getResultIndex() == null
? SPARK_RESPONSE_BUFFER_INDEX_NAME
: startJobRequest.getResultIndex();
StartJobRunRequest request =
new StartJobRunRequest()
.withName(startJobRequest.getJobName())
Expand All @@ -45,8 +49,7 @@ public String startJobRun(StartJobRequest startJobRequest) {
.withSparkSubmit(
new SparkSubmit()
.withEntryPoint(SPARK_SQL_APPLICATION_JAR)
.withEntryPointArguments(
startJobRequest.getQuery(), SPARK_RESPONSE_BUFFER_INDEX_NAME)
.withEntryPointArguments(startJobRequest.getQuery(), resultIndex)
.withSparkSubmitParameters(startJobRequest.getSparkSubmitParams())));
StartJobRunResult startJobRunResult =
AccessController.doPrivileged(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ public class StartJobRequest {
private final String executionRoleArn;
private final String sparkSubmitParams;
private final Map<String, String> tags;
private final String resultIndex;
}
Loading

0 comments on commit cc6b5a6

Please sign in to comment.