Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRILL-8239: Convert JSON UDF to EVF #2567

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,153 +18,138 @@
package org.apache.drill.exec.expr.fn.impl.conv;


import io.netty.buffer.DrillBuf;

import javax.inject.Inject;

import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.annotations.Workspace;
import org.apache.drill.exec.expr.holders.NullableVarBinaryHolder;
import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
import org.apache.drill.exec.expr.holders.VarBinaryHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;
import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.vector.complex.writer.BaseWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;

import javax.inject.Inject;
@SuppressWarnings("unused")
public class JsonConvertFrom {

static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(JsonConvertFrom.class);
private JsonConvertFrom() {}

private JsonConvertFrom() {
}
@FunctionTemplate(name = "convert_fromJSON",
scope = FunctionScope.SIMPLE, nulls = NullHandling.INTERNAL)
public static class ConvertFromJsonNullableInput implements DrillSimpleFunc {

@FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true)
public static class ConvertFromJson implements DrillSimpleFunc {
@Param
NullableVarBinaryHolder in;

@Param VarBinaryHolder in;
@Inject DrillBuf buffer;
@Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader;
@Output // TODO Remove in future work
BaseWriter.ComplexWriter writer;

@Output ComplexWriter writer;
@Inject
OptionManager options;

@Inject
ResultSetLoader rsLoader;

@Workspace
org.apache.drill.exec.store.easy.json.loader.ClosingStreamIterator streamIter;

@Workspace
org.apache.drill.exec.store.easy.json.loader.JsonLoaderImpl jsonLoader;

@Override
public void setup() {
jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer)
.defaultSchemaPathColumns()
.build();
streamIter = new org.apache.drill.exec.store.easy.json.loader.ClosingStreamIterator();
rsLoader.startBatch();
}

@Override
public void eval() {
try {
jsonReader.setSource(in.start, in.end, in.buffer);
jsonReader.write(writer);
buffer = jsonReader.getWorkBuf();
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e);
// If the input is null or empty, return an empty map
if (in.isSet == 0 || in.start == in.end) {
return;
}
}
}

@FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true)
public static class ConvertFromJsonVarchar implements DrillSimpleFunc {
java.io.InputStream inputStream = org.apache.drill.exec.vector.complex.fn.DrillBufInputStream.getStream(in.start, in.end, in.buffer);

@Param VarCharHolder in;
@Inject DrillBuf buffer;
@Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader;
try {
streamIter.setValue(inputStream);

@Output ComplexWriter writer;
if (jsonLoader == null) {
jsonLoader = org.apache.drill.exec.expr.fn.impl.conv.JsonConverterUtils.createJsonLoader(rsLoader, options, streamIter);
}

@Override
public void setup() {
jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer)
.defaultSchemaPathColumns()
.build();
}
org.apache.drill.exec.physical.resultSet.RowSetLoader rowWriter = rsLoader.writer();
rowWriter.start();
if (jsonLoader.parser().next()) {
rowWriter.save();
}
//inputStream.close();

@Override
public void eval() {
try {
jsonReader.setSource(in.start, in.end, in.buffer);
jsonReader.write(writer);
buffer = jsonReader.getWorkBuf();
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e);
throw org.apache.drill.common.exceptions.UserException.dataReadError(e)
.message("Error while reading JSON. ")
.addContext(e.getMessage())
.build();
}
}
}

@FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true)
public static class ConvertFromJsonNullableInput implements DrillSimpleFunc {
@FunctionTemplate(name = "convert_fromJSON",
scope = FunctionScope.SIMPLE, nulls = NullHandling.INTERNAL)
public static class ConvertFromJsonVarcharInput implements DrillSimpleFunc {

@Param NullableVarBinaryHolder in;
@Inject DrillBuf buffer;
@Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader;
@Param
NullableVarCharHolder in;

@Output ComplexWriter writer;
@Output // TODO Remove in future work
ComplexWriter writer;

@Override
public void setup() {
jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer)
.defaultSchemaPathColumns()
.build();
}
@Workspace
org.apache.drill.exec.store.easy.json.loader.ClosingStreamIterator streamIter;

@Override
public void eval() {
if (in.isSet == 0) {
// Return empty map
org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter mapWriter = writer.rootAsMap();
mapWriter.start();
mapWriter.end();
return;
}
@Inject
OptionManager options;

try {
jsonReader.setSource(in.start, in.end, in.buffer);
jsonReader.write(writer);
buffer = jsonReader.getWorkBuf();
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e);
}
}
}
@Inject
ResultSetLoader rsLoader;

@FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true)
public static class ConvertFromJsonVarcharNullableInput implements DrillSimpleFunc {

@Param NullableVarCharHolder in;
@Inject DrillBuf buffer;
@Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader;

@Output ComplexWriter writer;
@Workspace
org.apache.drill.exec.store.easy.json.loader.JsonLoaderImpl jsonLoader;

@Override
public void setup() {
jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer)
.defaultSchemaPathColumns()
.build();
streamIter = new org.apache.drill.exec.store.easy.json.loader.ClosingStreamIterator();
rsLoader.startBatch();
}

@Override
public void eval() {
if (in.isSet == 0) {
// Return empty map
org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter mapWriter = writer.rootAsMap();
mapWriter.start();
mapWriter.end();
// If the input is null or empty, return an empty map
if (in.isSet == 0 || in.start == in.end) {
return;
}

java.io.InputStream inputStream = org.apache.drill.exec.vector.complex.fn.DrillBufInputStream.getStream(in.start, in.end, in.buffer);

try {
jsonReader.setSource(in.start, in.end, in.buffer);
jsonReader.write(writer);
buffer = jsonReader.getWorkBuf();
streamIter.setValue(inputStream);
if (jsonLoader == null) {
jsonLoader = org.apache.drill.exec.expr.fn.impl.conv.JsonConverterUtils.createJsonLoader(rsLoader, options, streamIter);
}
org.apache.drill.exec.physical.resultSet.RowSetLoader rowWriter = rsLoader.writer();
rowWriter.start();
if (jsonLoader.parser().next()) {
rowWriter.save();
}
} catch (Exception e) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e);
throw org.apache.drill.common.exceptions.UserException.dataReadError(e)
.message("Error while reading JSON. ")
.addContext(e.getMessage())
.build();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.drill.exec.expr.fn.impl.conv;


import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.store.easy.json.loader.ClosingStreamIterator;
import org.apache.drill.exec.store.easy.json.loader.JsonLoaderImpl;
import org.apache.drill.exec.store.easy.json.loader.JsonLoaderImpl.JsonLoaderBuilder;


public class JsonConverterUtils {

/**
* Creates a {@link JsonLoaderImpl} for use in JSON conversion UDFs.
* @param rsLoader The {@link ResultSetLoader} used in the UDF
* @param options The {@link OptionManager} used in the UDF. This is used to extract the global JSON options
* @param stream An input stream containing the input JSON data
* @return A {@link JsonLoaderImpl} for use in the UDF.
*/
public static JsonLoaderImpl createJsonLoader(ResultSetLoader rsLoader,
OptionManager options,
ClosingStreamIterator stream) {
// Add JSON configuration from Storage plugin, if present.
JsonLoaderBuilder jsonLoaderBuilder = new JsonLoaderBuilder()
.resultSetLoader(rsLoader)
.standardOptions(options)
.fromStream(() -> stream);

return (JsonLoaderImpl) jsonLoaderBuilder.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.drill.exec.store.json;


import ch.qos.logback.classic.Level;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.physical.impl.project.ProjectRecordBatch;
import org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.physical.rowSet.RowSetBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.store.easy.json.loader.JsonLoaderImpl;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.ClusterTest;
import org.apache.drill.test.LogFixture;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.junit.BeforeClass;
import org.junit.Test;

import static org.junit.Assert.assertEquals;


public class TestJsonConversionUDF extends ClusterTest {

protected static LogFixture logFixture;
private final static Level CURRENT_LOG_LEVEL = Level.DEBUG;
@BeforeClass
public static void setup() throws Exception {
logFixture = LogFixture.builder()
.toConsole()
.logger(ProjectRecordBatch.class, CURRENT_LOG_LEVEL)
.logger(JsonLoaderImpl.class, CURRENT_LOG_LEVEL)
.logger(IteratorValidatorBatchIterator.class, CURRENT_LOG_LEVEL)
.build();

startCluster(ClusterFixture.builder(dirTestWatcher));
}

@Test
public void testConvertFromJsonVarBinary() throws Exception {
client.alterSession(ExecConstants.JSON_READER_NAN_INF_NUMBERS, true);
String sql = "SELECT string_binary(convert_toJSON(convert_fromJSON(columns[1]))) as col FROM cp.`jsoninput/nan_test.csv`";
RowSet results = client.queryBuilder().sql(sql).rowSet();
assertEquals("Query result must contain 1 row", 1, results.rowCount());
results.clear();
}

@Test
public void testConvertFromJsonVarChar() throws Exception {
String sql = "SELECT json_data['foo'] AS foo, json_data['num'] AS num FROM " +
"(SELECT convert_FromJSON('{\"foo\":\"bar\", \"num\":10}') as json_data FROM (VALUES(1)))";
RowSet results = client.queryBuilder().sql(sql).rowSet();

TupleMetadata expectedSchema = new SchemaBuilder()
.addNullable("foo", MinorType.VARCHAR)
.addNullable("num", MinorType.BIGINT)
.buildSchema();

RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
.addRow("bar", 10L)
.build();

new RowSetComparison(expected).verifyAndClearAll(results);
}

@Test
public void testMultipleRows() throws Exception {
String sql = "SELECT string_binary(convert_toJSON(`name`)) FROM cp.`jsoninput/multirow.csvh`";

RowSet results = client.queryBuilder().sql(sql).rowSet();
results.print();
}
}
Loading
Loading