forked from delta-io/delta
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Flink SQL/Catalog Support (delta-io#555)
* [FlinkSQL_PR_1] Flink Delta Sink - Table API UPDATED (delta-io#389) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> Signed-off-by: Krzysztof Chmielewski <krzysztof.chmielewski@getindata.com> Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> Co-authored-by: Paweł Kubit <pawel.kubit@getindata.com> Co-authored-by: Krzysztof Chmielewski <krzysztof.chmielewski@getindata.com> * [FlinkSQL_PR_2] - SQL Support for Delta Source connector. (delta-io#487) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_3] - Delta catalog skeleton (delta-io#503) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_4] - Delta catalog - Interactions with DeltaLog. Create and get table. (delta-io#506) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_5] - Delta catalog - DDL option validation. (delta-io#509) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_6] - Delta catalog - alter table + tests. (delta-io#510) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_7] - Delta catalog - Restrict Delta Table factory to work only with Delta Catalog + tests. (delta-io#514) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_8] - Delta Catalog - DDL/Query hint validation + tests. (delta-io#520) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_9] - Delta Catalog - Adding Flink's Hive catalog as decorated catalog. (delta-io#524) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_10] - Table API support SELECT with filter on partition column. (delta-io#528) * [FlinkSQL_PR_10] - Table API support SELECT with filter on partition column. --------- Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> Co-authored-by: Scott Sandre <scott.sandre@databricks.com> * [FlinkSQL_PR_11] - Delta Catalog - cache DeltaLog instances in DeltaCatalog. (delta-io#529) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_12] - UML diagrams. (delta-io#530) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_13] - Remove mergeSchema option from SQL API. (delta-io#531) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * [FlinkSQL_PR_14] - SQL examples. (delta-io#535) Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> * remove duplicate function after rebasing against master --------- Signed-off-by: Krzysztof Chmielewski <krzysiek.chmielewski@gmail.com> Signed-off-by: Krzysztof Chmielewski <krzysztof.chmielewski@getindata.com> Co-authored-by: kristoffSC <krzysiek.chmielewski@gmail.com> Co-authored-by: Paweł Kubit <pawel.kubit@getindata.com> Co-authored-by: Krzysztof Chmielewski <krzysztof.chmielewski@getindata.com>
- Loading branch information
Showing
97 changed files
with
11,242 additions
and
181 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
...ink-example/src/main/java/org/example/sql/StreamingApiDeltaSourceToTableDeltaSinkJob.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package org.example.sql; | ||
|
||
import java.util.UUID; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import io.delta.flink.source.DeltaSource; | ||
import org.apache.flink.api.common.eventtime.WatermarkStrategy; | ||
import org.apache.flink.core.fs.Path; | ||
import org.apache.flink.streaming.api.datastream.DataStreamSource; | ||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; | ||
import org.apache.flink.table.api.Table; | ||
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; | ||
import org.apache.flink.table.data.RowData; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.utils.Utils; | ||
import static org.utils.job.sql.SqlExampleBase.createTableStreamingEnv; | ||
import static org.utils.job.sql.SqlExampleBase.createTestStreamEnv; | ||
|
||
/** | ||
* This is an example of using Delta Connector both in Streaming and Table API. In this example a | ||
* Delta Source will be created using Streaming API and will be registered as Flink table. Next we | ||
* will use Flink SQL to read data from it using SELECT statement and write back to newly created | ||
* Delta table defined by CREATE TABLE statement. | ||
*/ | ||
public class StreamingApiDeltaSourceToTableDeltaSinkJob { | ||
|
||
private static final String SOURCE_TABLE_PATH = Utils.resolveExampleTableAbsolutePath( | ||
"data/source_table_no_partitions"); | ||
|
||
private static final String SINK_TABLE_PATH = Utils.resolveExampleTableAbsolutePath( | ||
"example_streamingToTableAPI_table_" + UUID.randomUUID().toString().split("-")[0]); | ||
|
||
public static void main(String[] args) throws Exception { | ||
StreamExecutionEnvironment streamEnv = createTestStreamEnv(false); // isStreaming = false | ||
StreamTableEnvironment tableEnv = createTableStreamingEnv(streamEnv); | ||
createPipeline(streamEnv, tableEnv); | ||
} | ||
|
||
private static void createPipeline( | ||
StreamExecutionEnvironment streamEnv, | ||
StreamTableEnvironment tableEnv) throws Exception { | ||
|
||
// Set up a Delta Source using Flink's Streaming API. | ||
DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData( | ||
new Path(SOURCE_TABLE_PATH), | ||
new Configuration() | ||
).build(); | ||
|
||
// create a source stream from Delta Source connector. | ||
DataStreamSource<RowData> sourceStream = | ||
streamEnv.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); | ||
|
||
// setup Delta Catalog | ||
tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); | ||
tableEnv.executeSql("USE CATALOG myDeltaCatalog"); | ||
|
||
// Convert source stream into Flink's table and register it as temporary view under | ||
// "InputTable" name. | ||
Table sourceTable = tableEnv.fromDataStream(sourceStream); | ||
tableEnv.createTemporaryView("InputTable", sourceTable); | ||
|
||
// Create Sink Delta table using Flink SQL API. | ||
tableEnv.executeSql(String.format("" | ||
+ "CREATE TABLE sinkTable (" | ||
+ "f1 STRING," | ||
+ "f2 STRING," | ||
+ "f3 INT" | ||
+ ") WITH (" | ||
+ " 'connector' = 'delta'," | ||
+ " 'table-path' = '%s'" | ||
+ ")", | ||
SINK_TABLE_PATH) | ||
); | ||
|
||
// Insert into sinkTable all rows read by Delta Source that is registered as "InputTable" | ||
// view. | ||
tableEnv.executeSql("INSERT INTO sinkTable SELECT * FROM InputTable") | ||
.await(10, TimeUnit.SECONDS); | ||
|
||
// Read and print all rows from sinkTable using Flink SQL. | ||
tableEnv.executeSql("SELECT * FROM sinkTable").print(); | ||
} | ||
} |
52 changes: 52 additions & 0 deletions
52
examples/flink-example/src/main/java/org/example/sql/insert/InsertTableExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package org.example.sql.insert; | ||
|
||
import java.util.UUID; | ||
|
||
import org.apache.flink.table.api.Table; | ||
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; | ||
import org.utils.Utils; | ||
import org.utils.job.sql.SqlSinkExampleBase; | ||
|
||
/** | ||
* This is an example of executing a INSERT query on Delta Table using Flink SQL. | ||
*/ | ||
public class InsertTableExample extends SqlSinkExampleBase { | ||
|
||
static String TABLE_PATH = Utils.resolveExampleTableAbsolutePath( | ||
"example_table_" + UUID.randomUUID().toString().split("-")[0]); | ||
|
||
public static void main(String[] args) | ||
throws Exception { | ||
new InsertTableExample().run(TABLE_PATH); | ||
} | ||
|
||
@Override | ||
protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { | ||
|
||
// setup Delta Catalog | ||
tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); | ||
tableEnv.executeSql("USE CATALOG myDeltaCatalog"); | ||
|
||
// SQL definition for Delta Table where we will insert rows. | ||
tableEnv.executeSql(String.format("" | ||
+ "CREATE TABLE sinkTable (" | ||
+ "f1 STRING," | ||
+ "f2 STRING," | ||
+ "f3 INT" | ||
+ ") WITH (" | ||
+ " 'connector' = 'delta'," | ||
+ " 'table-path' = '%s'" | ||
+ ")", | ||
tablePath) | ||
); | ||
|
||
// A SQL query that inserts three rows (three columns per row) into sinkTable. | ||
tableEnv.executeSql("" | ||
+ "INSERT INTO sinkTable VALUES " | ||
+ "('a', 'b', 1)," | ||
+ "('c', 'd', 2)," | ||
+ "('e', 'f', 3)" | ||
); | ||
return null; | ||
} | ||
} |
44 changes: 44 additions & 0 deletions
44
...flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package org.example.sql.select.bounded; | ||
|
||
import org.apache.flink.table.api.Table; | ||
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; | ||
import org.utils.Utils; | ||
import org.utils.job.sql.BoundedSqlSourceExampleBase; | ||
|
||
/** | ||
* This is an example of executing a bounded SELECT query on Delta Table using Flink SQL. | ||
*/ | ||
public class SelectBoundedTableExample extends BoundedSqlSourceExampleBase { | ||
|
||
private static final String TABLE_PATH = | ||
Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); | ||
|
||
public static void main(String[] args) throws Exception { | ||
new SelectBoundedTableExample().run(TABLE_PATH); | ||
} | ||
|
||
@Override | ||
protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { | ||
|
||
// setup Delta Catalog | ||
tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); | ||
tableEnv.executeSql("USE CATALOG myDeltaCatalog"); | ||
|
||
// SQL definition for Delta Table where we will insert rows. | ||
tableEnv.executeSql(String.format("" | ||
+ "CREATE TABLE sourceTable (" | ||
+ "f1 STRING," | ||
+ "f2 STRING," | ||
+ "f3 INT" | ||
+ ") WITH (" | ||
+ " 'connector' = 'delta'," | ||
+ " 'table-path' = '%s'" | ||
+ ")", | ||
tablePath) | ||
); | ||
|
||
// A batch SQL query that fetches all columns from sourceTable. The batch mode is a | ||
// default mode for SQL queries on Delta Table. | ||
return tableEnv.sqlQuery("SELECT * FROM sourceTable"); | ||
} | ||
} |
45 changes: 45 additions & 0 deletions
45
...le/src/main/java/org/example/sql/select/bounded/SelectBoundedTableVersionAsOfExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package org.example.sql.select.bounded; | ||
|
||
import org.apache.flink.table.api.Table; | ||
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; | ||
import org.utils.Utils; | ||
import org.utils.job.sql.BoundedSqlSourceExampleBase; | ||
|
||
/** | ||
* This is an example of executing a bounded SELECT query on Delta Table using Flink SQL | ||
* that will read Delta table from version specified by `versionAsOf` option. | ||
*/ | ||
public class SelectBoundedTableVersionAsOfExample extends BoundedSqlSourceExampleBase { | ||
|
||
private static final String TABLE_PATH = | ||
Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); | ||
|
||
public static void main(String[] args) throws Exception { | ||
new SelectBoundedTableVersionAsOfExample().run(TABLE_PATH); | ||
} | ||
|
||
@Override | ||
protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { | ||
|
||
// setup Delta Catalog | ||
tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); | ||
tableEnv.executeSql("USE CATALOG myDeltaCatalog"); | ||
|
||
// SQL definition for Delta Table where we will insert rows. | ||
tableEnv.executeSql(String.format("" | ||
+ "CREATE TABLE sourceTable (" | ||
+ "f1 STRING," | ||
+ "f2 STRING," | ||
+ "f3 INT" | ||
+ ") WITH (" | ||
+ " 'connector' = 'delta'," | ||
+ " 'table-path' = '%s'" | ||
+ ")", | ||
tablePath) | ||
); | ||
|
||
// A SQL query that fetches all columns from sourceTable starting from Delta version 1. | ||
// This query runs in batch mode which is a default mode for SQL queries on Delta Table. | ||
return tableEnv.sqlQuery("SELECT * FROM sourceTable /*+ OPTIONS('versionAsOf' = '1') */"); | ||
} | ||
} |
44 changes: 44 additions & 0 deletions
44
...example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package org.example.sql.select.continuous; | ||
|
||
import org.apache.flink.table.api.Table; | ||
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; | ||
import org.utils.Utils; | ||
import org.utils.job.sql.ContinuousSqlSourceExampleBase; | ||
|
||
/** | ||
* This is an example of executing a continuous SELECT query on Delta Table using Flink SQL. | ||
*/ | ||
public class SelectContinuousTableExample extends ContinuousSqlSourceExampleBase { | ||
|
||
private static final String TABLE_PATH = | ||
Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); | ||
|
||
public static void main(String[] args) throws Exception { | ||
new SelectContinuousTableExample().run(TABLE_PATH); | ||
} | ||
|
||
@Override | ||
protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { | ||
|
||
// setup Delta Catalog | ||
tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); | ||
tableEnv.executeSql("USE CATALOG myDeltaCatalog"); | ||
|
||
// SQL definition for Delta Table where we will insert rows. | ||
tableEnv.executeSql(String.format("" | ||
+ "CREATE TABLE sourceTable (" | ||
+ "f1 STRING," | ||
+ "f2 STRING," | ||
+ "f3 INT" | ||
+ ") WITH (" | ||
+ " 'connector' = 'delta'," | ||
+ " 'table-path' = '%s'" | ||
+ ")", | ||
tablePath) | ||
); | ||
|
||
// A SQL query that fetches all columns from sourceTable. | ||
// This query runs in continuous mode. | ||
return tableEnv.sqlQuery("SELECT * FROM sourceTable /*+ OPTIONS('mode' = 'streaming') */"); | ||
} | ||
} |
Oops, something went wrong.