From a6763ead8f9e74a3b5b141dcd05f2723eb87fa04 Mon Sep 17 00:00:00 2001 From: Hao Date: Mon, 7 Aug 2023 17:00:56 +0800 Subject: [PATCH 01/19] init sort-flink-v1.15-kafka module --- .../main/assemblies/sort-connectors-v1.15.xml | 8 + .../sort-connectors/kafka/pom.xml | 150 ++++++++++++++++++ .../apache/inlong/sort/kafka/KafkaSink.java | 8 + .../apache/inlong/sort/kafka/KafkaSource.java | 4 + .../kafka/table/KafkaDynamicTableFactory.java | 36 +++++ .../table/UpsertKafkaDynamicTableFactory.java | 36 +++++ .../org.apache.flink.table.factories.Factory | 17 ++ .../sort-flink-v1.15/sort-connectors/pom.xml | 1 + licenses/inlong-sort-connectors/LICENSE | 5 + 9 files changed, 265 insertions(+) create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory diff --git a/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml b/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml index c88fd0cd3d5..f28aa2b34e7 100644 --- a/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml +++ b/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml @@ -105,6 +105,14 @@ sort-connector-hudi-v1.15-${project.version}.jar + 0644 + + + ../inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/target + inlong-sort/connectors + + sort-connector-kafka-v1.15-${project.version}.jar + 0644 diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml new file mode 100644 index 00000000000..93ef79ed07b --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -0,0 +1,150 @@ + + + 4.0.0 + + org.apache.inlong + sort-connectors-v1.15 + 1.9.0-SNAPSHOT + + + sort-connector-kafka-v1.15 + jar + Apache InLong - Sort-connector-kafka + + + ${project.parent.parent.parent.parent.parent.basedir} + + + + + org.apache.flink + flink-connector-kafka + + + org.apache.inlong + sort-connector-base + ${project.version} + + + org.apache.inlong + audit-sdk + ${project.version} + compile + + + org.apache.inlong + sort-common + ${project.version} + compile + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-flink + + shade + + package + + + + org.apache.inlong:* + org.apache.kafka:* + com.google.protobuf:* + org.apache.flink:flink-connector-kafka + com.amazonaws:* + com.fasterxml.jackson.core:* + commons-logging:commons-logging + org.apache.httpcomponents:* + software.amazon.ion:* + joda-time:* + org.apache.commons:commons-lang3 + + + + + org.apache.kafka:* + + kafka/kafka-version.properties + LICENSE + + NOTICE + common/** + + + + org.apache.inlong:sort-connector-* + + org/apache/inlong/** + META-INF/services/org.apache.flink.table.factories.Factory + + + + + + org.apache.inlong.sort.base + org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.base + + + org.apache.kafka + org.apache.flink.kafka.shaded.org.apache.kafka + + + com.amazonaws + org.apache.inlong.sort.kafka.shaded.com.amazonaws + + + com.fasterxml.jackson.core + org.apache.inlong.sort.kafka.shaded.com.fasterxml.jackson.core + + + org.apache.commons.logging + org.apache.inlong.sort.kafka.shaded.org.apache.commons.logging + + + org.apache.http + org.apache.inlong.sort.kafka.shaded.org.apache.http + + + software.amazon.ion + org.apache.inlong.sort.kafka.shaded.software.amazon.ion + + + org.joda.time + org.apache.inlong.sort.kafka.shaded.org.joda.time + + + org.apache.commons.lang3 + org.apache.inlong.sort.kafka.shaded.org.apache.commons.lang3 + + + org.apache.inlong.sort.configuration + org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.configuration + + + org.apache.inlong.sort.protocol + org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.protocol + + + org.apache.inlong.sort.util + org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.util + + + + + + + + + + \ No newline at end of file diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java new file mode 100644 index 00000000000..7a794798e2e --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java @@ -0,0 +1,8 @@ +package org.apache.inlong.sort.kafka; + + + + +public class KafkaSink { + +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java new file mode 100644 index 00000000000..9fbe76cc1e9 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java @@ -0,0 +1,4 @@ +package org.apache.inlong.sort.kafka; + +public class KafkaSource { +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java new file mode 100644 index 00000000000..6a2fef5d5e2 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -0,0 +1,36 @@ +package org.apache.inlong.sort.kafka.table; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.factories.DynamicTableSinkFactory; +import org.apache.flink.table.factories.DynamicTableSourceFactory; + +import java.util.Set; + +public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + return null; + } + + @Override + public DynamicTableSource createDynamicTableSource(Context context) { + return null; + } + + @Override + public String factoryIdentifier() { + return null; + } + + @Override + public Set> requiredOptions() { + return null; + } + + @Override + public Set> optionalOptions() { + return null; + } +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java new file mode 100644 index 00000000000..e4be5275fa1 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -0,0 +1,36 @@ +package org.apache.inlong.sort.kafka.table; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.factories.DynamicTableSinkFactory; +import org.apache.flink.table.factories.DynamicTableSourceFactory; + +import java.util.Set; + +public class UpsertKafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + return null; + } + + @Override + public DynamicTableSource createDynamicTableSource(Context context) { + return null; + } + + @Override + public String factoryIdentifier() { + return null; + } + + @Override + public Set> requiredOptions() { + return null; + } + + @Override + public Set> optionalOptions() { + return null; + } +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 00000000000..7eeb7cd7a87 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.inlong.sort.kafka.table.KafkaDynamicTableFactory +org.apache.inlong.sort.kafka.table.UpsertKafkaDynamicTableFactory \ No newline at end of file diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml index 38c475a5731..dd789c6bd58 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml @@ -42,6 +42,7 @@ tubemq hbase hudi + kafka diff --git a/licenses/inlong-sort-connectors/LICENSE b/licenses/inlong-sort-connectors/LICENSE index 3ac170a2d3a..9388d367717 100644 --- a/licenses/inlong-sort-connectors/LICENSE +++ b/licenses/inlong-sort-connectors/LICENSE @@ -815,6 +815,11 @@ Source : flink-connector-hbase-2.2 1.15.4 (Please note that the software have been modified.) License : https://github.com/apache/flink/blob/master/LICENSE +1.3.20 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java + inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java + Source : org.apache.flink:flink-connector-kafka:1.15.4 (Please note that the software have been modified.) + License : https://github.com/apache/flink/blob/master/LICENSE + ======================================================================= Apache InLong Subcomponents: From f1b44e4a31493d0d00444c552564f54bccb7f4d3 Mon Sep 17 00:00:00 2001 From: Hao Date: Mon, 7 Aug 2023 17:06:13 +0800 Subject: [PATCH 02/19] add license --- .../sort-connectors/kafka/pom.xml | 5 ++--- .../apache/inlong/sort/kafka/KafkaSink.java | 20 ++++++++++++++++--- .../apache/inlong/sort/kafka/KafkaSource.java | 17 ++++++++++++++++ .../kafka/table/KafkaDynamicTableFactory.java | 18 +++++++++++++++++ .../table/UpsertKafkaDynamicTableFactory.java | 18 +++++++++++++++++ 5 files changed, 72 insertions(+), 6 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index 93ef79ed07b..aafc4373169 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -1,6 +1,5 @@ - 4.0.0 @@ -147,4 +146,4 @@ - \ No newline at end of file + diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java index 7a794798e2e..371ce554657 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java @@ -1,7 +1,21 @@ -package org.apache.inlong.sort.kafka; - - +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.inlong.sort.kafka; public class KafkaSink { diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java index 9fbe76cc1e9..ca742482102 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.inlong.sort.kafka; public class KafkaSource { diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index 6a2fef5d5e2..57f5c69a789 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.inlong.sort.kafka.table; import org.apache.flink.configuration.ConfigOption; @@ -9,6 +26,7 @@ import java.util.Set; public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + @Override public DynamicTableSink createDynamicTableSink(Context context) { return null; diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index e4be5275fa1..1d40cba797a 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.inlong.sort.kafka.table; import org.apache.flink.configuration.ConfigOption; @@ -9,6 +26,7 @@ import java.util.Set; public class UpsertKafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + @Override public DynamicTableSink createDynamicTableSink(Context context) { return null; From e23da27e5b57b72cb7f080eed5e342d2103fefd6 Mon Sep 17 00:00:00 2001 From: Hao Date: Mon, 7 Aug 2023 18:19:27 +0800 Subject: [PATCH 03/19] add license --- .../sort-connectors/kafka/pom.xml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index aafc4373169..33764aefc29 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -1,4 +1,22 @@ + 4.0.0 From 84e31c3fe3b5856c12e167c025d226deca98f643 Mon Sep 17 00:00:00 2001 From: Hao Date: Mon, 7 Aug 2023 23:42:14 +0800 Subject: [PATCH 04/19] KafkaDynamicTableFactory options --- .../inlong/sort/kafka/KafkaOptions.java | 38 ++++++++++ .../kafka/table/KafkaDynamicTableFactory.java | 76 +++++++++++++++++-- 2 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java new file mode 100644 index 00000000000..69909964925 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.kafka; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; + +/** Option utils for Kafka table source sink. */ +public class KafkaOptions { + + private KafkaOptions() { + } + + // -------------------------------------------------------------------------------------------- + // Sink specific options + // -------------------------------------------------------------------------------------------- + public static final ConfigOption KAFKA_IGNORE_ALL_CHANGELOG = + ConfigOptions.key("sink.ignore.changelog") + .booleanType() + .defaultValue(false) + .withDescription("Regard upsert delete as insert kind."); + +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index 57f5c69a789..80f2daf77f4 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -17,38 +17,98 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.flink.annotation.Internal; import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.source.DynamicTableSource; import org.apache.flink.table.factories.DynamicTableSinkFactory; import org.apache.flink.table.factories.DynamicTableSourceFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.inlong.sort.base.Constants; +import org.apache.inlong.sort.kafka.KafkaOptions; +import java.util.HashSet; import java.util.Set; + +@Internal public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + public static final String IDENTIFIER = "kafka-inlong"; + + private static final ConfigOption SINK_MULTIPLE_PARTITION_PATTERN = + ConfigOptions.key("sink.multiple.partition-pattern") + .stringType() + .noDefaultValue() + .withDescription( + "option 'sink.multiple.partition-pattern' used either when the partitioner is raw-hash, or when passing in designated partition field names for custom field partitions"); + + private static final ConfigOption SINK_FIXED_IDENTIFIER = + ConfigOptions.key("sink.fixed.identifier") + .stringType() + .defaultValue("-1"); + + private static final ConfigOption SINK_SEMANTIC = + ConfigOptions.key("sink.semantic") + .stringType() + .noDefaultValue() + .withDescription("Optional semantic when committing."); + @Override - public DynamicTableSink createDynamicTableSink(Context context) { - return null; + public String factoryIdentifier() { + return IDENTIFIER; } @Override - public DynamicTableSource createDynamicTableSource(Context context) { - return null; + public Set> requiredOptions() { + final Set> options = new HashSet<>(); + options.add(KafkaConnectorOptions.PROPS_BOOTSTRAP_SERVERS); + return options; } @Override - public String factoryIdentifier() { - return null; + public Set> optionalOptions() { + final Set> options = new HashSet<>(); + options.add(FactoryUtil.FORMAT); + options.add(KafkaConnectorOptions.KEY_FORMAT); + options.add(KafkaConnectorOptions.KEY_FIELDS); + options.add(KafkaConnectorOptions.KEY_FIELDS_PREFIX); + options.add(KafkaConnectorOptions.VALUE_FORMAT); + options.add(KafkaConnectorOptions.VALUE_FIELDS_INCLUDE); + options.add(KafkaConnectorOptions.TOPIC); + options.add(KafkaConnectorOptions.TOPIC_PATTERN); + options.add(KafkaConnectorOptions.PROPS_GROUP_ID); + options.add(KafkaConnectorOptions.SCAN_STARTUP_MODE); + options.add(KafkaConnectorOptions.SCAN_STARTUP_SPECIFIC_OFFSETS); + options.add(KafkaConnectorOptions.SCAN_TOPIC_PARTITION_DISCOVERY); + options.add(KafkaConnectorOptions.SCAN_STARTUP_TIMESTAMP_MILLIS); + options.add(KafkaConnectorOptions.SINK_PARTITIONER); + options.add(FactoryUtil.SINK_PARALLELISM); + options.add(Constants.INLONG_METRIC); + options.add(Constants.INLONG_AUDIT); + options.add(Constants.AUDIT_KEYS); + options.add(Constants.SINK_MULTIPLE_FORMAT); + options.add(Constants.PATTERN_PARTITION_MAP); + options.add(Constants.DATASOURCE_PARTITION_MAP); + options.add(Constants.SINK_SCHEMA_CHANGE_ENABLE); + options.add(Constants.SINK_SCHEMA_CHANGE_POLICIES); + options.add(KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG); + options.add(SINK_MULTIPLE_PARTITION_PATTERN); + options.add(SINK_FIXED_IDENTIFIER); + options.add(SINK_SEMANTIC); + return options; } @Override - public Set> requiredOptions() { + public DynamicTableSource createDynamicTableSource(Context context) { return null; } @Override - public Set> optionalOptions() { + public DynamicTableSink createDynamicTableSink(Context context) { return null; } + } From a7f3f969a96d9d63206db663ab53cf429dd38af1 Mon Sep 17 00:00:00 2001 From: Hao Date: Tue, 8 Aug 2023 15:07:21 +0800 Subject: [PATCH 05/19] Implementations of createKafkaTableSource --- .../apache/inlong/sort/kafka/KafkaSink.java | 22 -- .../apache/inlong/sort/kafka/KafkaSource.java | 21 -- .../table/KafkaConnectorOptionsUtil.java | 356 ++++++++++++++++++ .../kafka/table/KafkaDynamicTableFactory.java | 206 +++++++++- 4 files changed, 555 insertions(+), 50 deletions(-) delete mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java delete mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java deleted file mode 100644 index 371ce554657..00000000000 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSink.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.inlong.sort.kafka; - -public class KafkaSink { - -} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java deleted file mode 100644 index ca742482102..00000000000 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaSource.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.inlong.sort.kafka; - -public class KafkaSource { -} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java new file mode 100644 index 00000000000..150db98b4ad --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.kafka.table; + +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.streaming.connectors.kafka.config.StartupMode; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; +import org.apache.flink.table.types.logical.utils.LogicalTypeChecks; +import org.apache.flink.util.Preconditions; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Pattern; +import java.util.stream.IntStream; + +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FORMAT; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_MODE; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_SPECIFIC_OFFSETS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_TIMESTAMP_MILLIS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC_PATTERN; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FIELDS_INCLUDE; + +public class KafkaConnectorOptionsUtil { + + public static final String PROPERTIES_PREFIX = "properties."; + + // Other keywords. + private static final String PARTITION = "partition"; + private static final String OFFSET = "offset"; + + public static void validateTableSourceOptions(ReadableConfig tableOptions) { + validateSourceTopic(tableOptions); + validateScanStartupMode(tableOptions); + } + + public static void validateSourceTopic(ReadableConfig tableOptions) { + Optional> topic = tableOptions.getOptional(TOPIC); + Optional pattern = tableOptions.getOptional(TOPIC_PATTERN); + + if (topic.isPresent() && pattern.isPresent()) { + throw new ValidationException( + "Option 'topic' and 'topic-pattern' shouldn't be set together."); + } + + if (!topic.isPresent() && !pattern.isPresent()) { + throw new ValidationException("Either 'topic' or 'topic-pattern' must be set."); + } + } + + private static void validateScanStartupMode(ReadableConfig tableOptions) { + tableOptions + .getOptional(SCAN_STARTUP_MODE) + .ifPresent( + mode -> { + switch (mode) { + case TIMESTAMP: + if (!tableOptions + .getOptional(SCAN_STARTUP_TIMESTAMP_MILLIS) + .isPresent()) { + throw new ValidationException( + String.format( + "'%s' is required in '%s' startup mode" + + " but missing.", + SCAN_STARTUP_TIMESTAMP_MILLIS.key(), + KafkaConnectorOptions.ScanStartupMode.TIMESTAMP)); + } + + break; + case SPECIFIC_OFFSETS: + if (!tableOptions + .getOptional(SCAN_STARTUP_SPECIFIC_OFFSETS) + .isPresent()) { + throw new ValidationException( + String.format( + "'%s' is required in '%s' startup mode" + + " but missing.", + SCAN_STARTUP_SPECIFIC_OFFSETS.key(), + KafkaConnectorOptions.ScanStartupMode.SPECIFIC_OFFSETS)); + } + if (!isSingleTopic(tableOptions)) { + throw new ValidationException( + "Currently Kafka source only supports specific offset for single topic."); + } + String specificOffsets = + tableOptions.get(SCAN_STARTUP_SPECIFIC_OFFSETS); + parseSpecificOffsets( + specificOffsets, SCAN_STARTUP_SPECIFIC_OFFSETS.key()); + + break; + } + }); + } + + private static boolean isSingleTopic(ReadableConfig tableOptions) { + // Option 'topic-pattern' is regarded as multi-topics. + return tableOptions.getOptional(TOPIC).map(t -> t.size() == 1).orElse(false); + } + + public static Map parseSpecificOffsets( + String specificOffsetsStr, String optionKey) { + final Map offsetMap = new HashMap<>(); + final String[] pairs = specificOffsetsStr.split(";"); + final String validationExceptionMessage = + String.format( + "Invalid properties '%s' should follow the format " + + "'partition:0,offset:42;partition:1,offset:300', but is '%s'.", + optionKey, specificOffsetsStr); + + if (pairs.length == 0) { + throw new ValidationException(validationExceptionMessage); + } + + for (String pair : pairs) { + if (null == pair || pair.length() == 0 || !pair.contains(",")) { + throw new ValidationException(validationExceptionMessage); + } + + final String[] kv = pair.split(","); + if (kv.length != 2 + || !kv[0].startsWith(PARTITION + ':') + || !kv[1].startsWith(OFFSET + ':')) { + throw new ValidationException(validationExceptionMessage); + } + + String partitionValue = kv[0].substring(kv[0].indexOf(":") + 1); + String offsetValue = kv[1].substring(kv[1].indexOf(":") + 1); + try { + final Integer partition = Integer.valueOf(partitionValue); + final Long offset = Long.valueOf(offsetValue); + offsetMap.put(partition, offset); + } catch (NumberFormatException e) { + throw new ValidationException(validationExceptionMessage, e); + } + } + return offsetMap; + } + + public static StartupOptions getStartupOptions(ReadableConfig tableOptions) { + final Map specificOffsets = new HashMap<>(); + final StartupMode startupMode = + tableOptions + .getOptional(SCAN_STARTUP_MODE) + .map(KafkaConnectorOptionsUtil::fromOption) + .orElse(StartupMode.GROUP_OFFSETS); + if (startupMode == StartupMode.SPECIFIC_OFFSETS) { + // It will be refactored after support specific offset for multiple topics in + // FLINK-18602. We have already checked tableOptions.get(TOPIC) contains one topic in + // validateScanStartupMode(). + buildSpecificOffsets(tableOptions, tableOptions.get(TOPIC).get(0), specificOffsets); + } + + final StartupOptions options = new StartupOptions(); + options.startupMode = startupMode; + options.specificOffsets = specificOffsets; + if (startupMode == StartupMode.TIMESTAMP) { + options.startupTimestampMillis = tableOptions.get(SCAN_STARTUP_TIMESTAMP_MILLIS); + } + return options; + } + + private static void buildSpecificOffsets( + ReadableConfig tableOptions, + String topic, + Map specificOffsets) { + String specificOffsetsStrOpt = tableOptions.get(SCAN_STARTUP_SPECIFIC_OFFSETS); + final Map offsetMap = + parseSpecificOffsets(specificOffsetsStrOpt, SCAN_STARTUP_SPECIFIC_OFFSETS.key()); + offsetMap.forEach( + (partition, offset) -> { + final KafkaTopicPartition topicPartition = + new KafkaTopicPartition(topic, partition); + specificOffsets.put(topicPartition, offset); + }); + } + + /** + * Returns the {@link StartupMode} of Kafka Consumer by passed-in table-specific {@link + * KafkaConnectorOptions.ScanStartupMode}. + */ + private static StartupMode fromOption(KafkaConnectorOptions.ScanStartupMode scanStartupMode) { + switch (scanStartupMode) { + case EARLIEST_OFFSET: + return StartupMode.EARLIEST; + case LATEST_OFFSET: + return StartupMode.LATEST; + case GROUP_OFFSETS: + return StartupMode.GROUP_OFFSETS; + case SPECIFIC_OFFSETS: + return StartupMode.SPECIFIC_OFFSETS; + case TIMESTAMP: + return StartupMode.TIMESTAMP; + + default: + throw new TableException( + "Unsupported startup mode. Validator should have checked that."); + } + } + + /** + * Creates an array of indices that determine which physical fields of the table schema to + * include in the key format and the order that those fields have in the key format. + * + *

See {@link KafkaConnectorOptions#KEY_FORMAT}, {@link KafkaConnectorOptions#KEY_FIELDS}, + * and {@link KafkaConnectorOptions#KEY_FIELDS_PREFIX} for more information. + */ + public static int[] createKeyFormatProjection( + ReadableConfig options, DataType physicalDataType) { + final LogicalType physicalType = physicalDataType.getLogicalType(); + Preconditions.checkArgument( + physicalType.is(LogicalTypeRoot.ROW), "Row data type expected."); + final Optional optionalKeyFormat = options.getOptional(KEY_FORMAT); + final Optional> optionalKeyFields = options.getOptional(KEY_FIELDS); + + if (!optionalKeyFormat.isPresent() && optionalKeyFields.isPresent()) { + throw new ValidationException( + String.format( + "The option '%s' can only be declared if a key format is defined using '%s'.", + KEY_FIELDS.key(), KEY_FORMAT.key())); + } else if (optionalKeyFormat.isPresent() + && (!optionalKeyFields.isPresent() || optionalKeyFields.get().size() == 0)) { + throw new ValidationException( + String.format( + "A key format '%s' requires the declaration of one or more of key fields using '%s'.", + KEY_FORMAT.key(), KEY_FIELDS.key())); + } + + if (!optionalKeyFormat.isPresent()) { + return new int[0]; + } + + final String keyPrefix = options.getOptional(KEY_FIELDS_PREFIX).orElse(""); + + final List keyFields = optionalKeyFields.get(); + final List physicalFields = LogicalTypeChecks.getFieldNames(physicalType); + return keyFields.stream() + .mapToInt( + keyField -> { + final int pos = physicalFields.indexOf(keyField); + // check that field name exists + if (pos < 0) { + throw new ValidationException( + String.format( + "Could not find the field '%s' in the table schema for usage in the key format. " + + "A key field must be a regular, physical column. " + + "The following columns can be selected in the '%s' option:\n" + + "%s", + keyField, KEY_FIELDS.key(), physicalFields)); + } + // check that field name is prefixed correctly + if (!keyField.startsWith(keyPrefix)) { + throw new ValidationException( + String.format( + "All fields in '%s' must be prefixed with '%s' when option '%s' " + + "is set but field '%s' is not prefixed.", + KEY_FIELDS.key(), + keyPrefix, + KEY_FIELDS_PREFIX.key(), + keyField)); + } + return pos; + }) + .toArray(); + } + + /** + * Creates an array of indices that determine which physical fields of the table schema to + * include in the value format. + * + *

See {@link KafkaConnectorOptions#VALUE_FORMAT}, {@link + * KafkaConnectorOptions#VALUE_FIELDS_INCLUDE}, and {@link + * KafkaConnectorOptions#KEY_FIELDS_PREFIX} for more information. + */ + public static int[] createValueFormatProjection( + ReadableConfig options, DataType physicalDataType) { + final LogicalType physicalType = physicalDataType.getLogicalType(); + Preconditions.checkArgument( + physicalType.is(LogicalTypeRoot.ROW), "Row data type expected."); + final int physicalFieldCount = LogicalTypeChecks.getFieldCount(physicalType); + final IntStream physicalFields = IntStream.range(0, physicalFieldCount); + + final String keyPrefix = options.getOptional(KEY_FIELDS_PREFIX).orElse(""); + + final KafkaConnectorOptions.ValueFieldsStrategy strategy = options.get(VALUE_FIELDS_INCLUDE); + if (strategy == KafkaConnectorOptions.ValueFieldsStrategy.ALL) { + if (keyPrefix.length() > 0) { + throw new ValidationException( + String.format( + "A key prefix is not allowed when option '%s' is set to '%s'. " + + "Set it to '%s' instead to avoid field overlaps.", + VALUE_FIELDS_INCLUDE.key(), + KafkaConnectorOptions.ValueFieldsStrategy.ALL, + KafkaConnectorOptions.ValueFieldsStrategy.EXCEPT_KEY)); + } + return physicalFields.toArray(); + } else if (strategy == KafkaConnectorOptions.ValueFieldsStrategy.EXCEPT_KEY) { + final int[] keyProjection = createKeyFormatProjection(options, physicalDataType); + return physicalFields + .filter(pos -> IntStream.of(keyProjection).noneMatch(k -> k == pos)) + .toArray(); + } + throw new TableException("Unknown value fields strategy:" + strategy); + } + + // -------------------------------------------------------------------------------------------- + // Utilities + // -------------------------------------------------------------------------------------------- + + public static List getSourceTopics(ReadableConfig tableOptions) { + return tableOptions.getOptional(TOPIC).orElse(null); + } + + public static Pattern getSourceTopicPattern(ReadableConfig tableOptions) { + return tableOptions.getOptional(TOPIC_PATTERN).map(Pattern::compile).orElse(null); + } + + // -------------------------------------------------------------------------------------------- + // Inner classes + // -------------------------------------------------------------------------------------------- + + /** Kafka startup options. * */ + public static class StartupOptions { + + public StartupMode startupMode; + public Map specificOffsets; + public long startupTimestampMillis; + } + + private KafkaConnectorOptionsUtil() { + } +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index 80f2daf77f4..0cccc1bcc6e 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -17,21 +17,58 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.inlong.sort.base.Constants; +import org.apache.inlong.sort.kafka.KafkaOptions; + import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.kafka.source.KafkaSourceOptions; +import org.apache.flink.streaming.connectors.kafka.config.StartupMode; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; +import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSource; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.ObjectIdentifier; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.format.Format; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DeserializationFormatFactory; import org.apache.flink.table.factories.DynamicTableSinkFactory; import org.apache.flink.table.factories.DynamicTableSourceFactory; import org.apache.flink.table.factories.FactoryUtil; -import org.apache.inlong.sort.base.Constants; -import org.apache.inlong.sort.kafka.KafkaOptions; +import org.apache.flink.table.factories.FactoryUtil.TableFactoryHelper; +import org.apache.flink.table.types.DataType; +import org.apache.flink.types.RowKind; +import javax.annotation.Nullable; + +import java.time.Duration; import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; import java.util.Set; +import java.util.regex.Pattern; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FORMAT; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_TOPIC_PARTITION_DISCOVERY; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.StartupOptions; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createKeyFormatProjection; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createValueFormatProjection; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopicPattern; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopics; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getStartupOptions; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.validateTableSourceOptions; @Internal public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { @@ -72,17 +109,17 @@ public Set> requiredOptions() { public Set> optionalOptions() { final Set> options = new HashSet<>(); options.add(FactoryUtil.FORMAT); - options.add(KafkaConnectorOptions.KEY_FORMAT); + options.add(KEY_FORMAT); options.add(KafkaConnectorOptions.KEY_FIELDS); - options.add(KafkaConnectorOptions.KEY_FIELDS_PREFIX); - options.add(KafkaConnectorOptions.VALUE_FORMAT); + options.add(KEY_FIELDS_PREFIX); + options.add(VALUE_FORMAT); options.add(KafkaConnectorOptions.VALUE_FIELDS_INCLUDE); options.add(KafkaConnectorOptions.TOPIC); options.add(KafkaConnectorOptions.TOPIC_PATTERN); options.add(KafkaConnectorOptions.PROPS_GROUP_ID); options.add(KafkaConnectorOptions.SCAN_STARTUP_MODE); options.add(KafkaConnectorOptions.SCAN_STARTUP_SPECIFIC_OFFSETS); - options.add(KafkaConnectorOptions.SCAN_TOPIC_PARTITION_DISCOVERY); + options.add(SCAN_TOPIC_PARTITION_DISCOVERY); options.add(KafkaConnectorOptions.SCAN_STARTUP_TIMESTAMP_MILLIS); options.add(KafkaConnectorOptions.SINK_PARTITIONER); options.add(FactoryUtil.SINK_PARALLELISM); @@ -103,7 +140,59 @@ public Set> optionalOptions() { @Override public DynamicTableSource createDynamicTableSource(Context context) { - return null; + final TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); + + final Optional>> keyDecodingFormat = + getKeyDecodingFormat(helper); + + final DecodingFormat> valueDecodingFormat = + getValueDecodingFormat(helper); + + helper.validateExcept(PROPERTIES_PREFIX); + + final ReadableConfig tableOptions = helper.getOptions(); + + validateTableSourceOptions(tableOptions); + + validatePKConstraints( + context.getObjectIdentifier(), + context.getPrimaryKeyIndexes(), + context.getCatalogTable().getOptions(), + valueDecodingFormat); + + final StartupOptions startupOptions = getStartupOptions(tableOptions); + + final Properties properties = getKafkaProperties(context.getCatalogTable().getOptions()); + + // add topic-partition discovery + final Optional partitionDiscoveryInterval = + tableOptions.getOptional(SCAN_TOPIC_PARTITION_DISCOVERY).map(Duration::toMillis); + properties.setProperty( + KafkaSourceOptions.PARTITION_DISCOVERY_INTERVAL_MS.key(), + partitionDiscoveryInterval.orElse(-1L).toString()); + + final DataType physicalDataType = context.getPhysicalRowDataType(); + + final int[] keyProjection = createKeyFormatProjection(tableOptions, physicalDataType); + + final int[] valueProjection = createValueFormatProjection(tableOptions, physicalDataType); + + final String keyPrefix = tableOptions.getOptional(KEY_FIELDS_PREFIX).orElse(null); + + return createKafkaTableSource( + physicalDataType, + keyDecodingFormat.orElse(null), + valueDecodingFormat, + keyProjection, + valueProjection, + keyPrefix, + getSourceTopics(tableOptions), + getSourceTopicPattern(tableOptions), + properties, + startupOptions.startupMode, + startupOptions.specificOffsets, + startupOptions.startupTimestampMillis, + context.getObjectIdentifier().asSummaryString()); } @Override @@ -111,4 +200,107 @@ public DynamicTableSink createDynamicTableSink(Context context) { return null; } + private static Optional>> getKeyDecodingFormat( + TableFactoryHelper helper) { + final Optional>> keyDecodingFormat = + helper.discoverOptionalDecodingFormat( + DeserializationFormatFactory.class, KEY_FORMAT); + keyDecodingFormat.ifPresent( + format -> { + if (!format.getChangelogMode().containsOnly(RowKind.INSERT)) { + throw new ValidationException( + String.format( + "A key format should only deal with INSERT-only records. " + + "But %s has a changelog mode of %s.", + helper.getOptions().get(KEY_FORMAT), + format.getChangelogMode())); + } + }); + return keyDecodingFormat; + } + + private static DecodingFormat> getValueDecodingFormat( + TableFactoryHelper helper) { + return helper.discoverOptionalDecodingFormat( + DeserializationFormatFactory.class, FactoryUtil.FORMAT) + .orElseGet( + () -> helper.discoverDecodingFormat( + DeserializationFormatFactory.class, VALUE_FORMAT)); + } + + private static void validatePKConstraints( + ObjectIdentifier tableName, + int[] primaryKeyIndexes, + Map options, + Format format) { + if (primaryKeyIndexes.length > 0 + && format.getChangelogMode().containsOnly(RowKind.INSERT)) { + Configuration configuration = Configuration.fromMap(options); + String formatName = + configuration + .getOptional(FactoryUtil.FORMAT) + .orElse(configuration.get(VALUE_FORMAT)); + throw new ValidationException( + String.format( + "The Kafka table '%s' with '%s' format doesn't support defining PRIMARY KEY constraint" + + " on the table, because it can't guarantee the semantic of primary key.", + tableName.asSummaryString(), formatName)); + } + } + + public static Properties getKafkaProperties(Map tableOptions) { + final Properties kafkaProperties = new Properties(); + + if (hasKafkaClientProperties(tableOptions)) { + tableOptions.keySet().stream() + .filter(key -> key.startsWith(PROPERTIES_PREFIX)) + .forEach( + key -> { + final String value = tableOptions.get(key); + final String subKey = key.substring((PROPERTIES_PREFIX).length()); + kafkaProperties.put(subKey, value); + }); + } + return kafkaProperties; + } + + /** + * Decides if the table options contains Kafka client properties that start with prefix + * 'properties'. + */ + private static boolean hasKafkaClientProperties(Map tableOptions) { + return tableOptions.keySet().stream().anyMatch(k -> k.startsWith(PROPERTIES_PREFIX)); + } + + protected KafkaDynamicSource createKafkaTableSource( + DataType physicalDataType, + @Nullable DecodingFormat> keyDecodingFormat, + DecodingFormat> valueDecodingFormat, + int[] keyProjection, + int[] valueProjection, + @Nullable String keyPrefix, + @Nullable List topics, + @Nullable Pattern topicPattern, + Properties properties, + StartupMode startupMode, + Map specificStartupOffsets, + long startupTimestampMillis, + String tableIdentifier) { + return new KafkaDynamicSource( + physicalDataType, + keyDecodingFormat, + valueDecodingFormat, + keyProjection, + valueProjection, + keyPrefix, + topics, + topicPattern, + properties, + startupMode, + specificStartupOffsets, + startupTimestampMillis, + false, + tableIdentifier); + } + } From b4684d9190ba34e7297990614d2c3d8d77e30541 Mon Sep 17 00:00:00 2001 From: Hao Date: Tue, 8 Aug 2023 15:29:02 +0800 Subject: [PATCH 06/19] Implementations of createKafkaTableSink --- .../table/KafkaConnectorOptionsUtil.java | 181 ++++++++++++++++++ .../kafka/table/KafkaDynamicTableFactory.java | 136 ++++++++++++- 2 files changed, 316 insertions(+), 1 deletion(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java index 150db98b4ad..50502823f01 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java @@ -17,18 +17,30 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.base.DeliveryGuarantee; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; import org.apache.flink.table.api.TableException; import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.FactoryUtil; import org.apache.flink.table.types.DataType; import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalTypeRoot; import org.apache.flink.table.types.logical.utils.LogicalTypeChecks; +import org.apache.flink.util.FlinkException; +import org.apache.flink.util.InstantiationUtil; import org.apache.flink.util.Preconditions; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -36,29 +48,51 @@ import java.util.regex.Pattern; import java.util.stream.IntStream; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.DELIVERY_GUARANTEE; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FORMAT; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_MODE; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_SPECIFIC_OFFSETS; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_TIMESTAMP_MILLIS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARTITIONER; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC_PATTERN; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TRANSACTIONAL_ID_PREFIX; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FIELDS_INCLUDE; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; +import static org.apache.flink.table.factories.FactoryUtil.FORMAT; public class KafkaConnectorOptionsUtil { + private static final ConfigOption SCHEMA_REGISTRY_SUBJECT = + ConfigOptions.key("schema-registry.subject").stringType().noDefaultValue(); + public static final String PROPERTIES_PREFIX = "properties."; + // Sink partitioner. + public static final String SINK_PARTITIONER_VALUE_DEFAULT = "default"; + public static final String SINK_PARTITIONER_VALUE_FIXED = "fixed"; + public static final String SINK_PARTITIONER_VALUE_ROUND_ROBIN = "round-robin"; + // Other keywords. private static final String PARTITION = "partition"; private static final String OFFSET = "offset"; + protected static final String AVRO_CONFLUENT = "avro-confluent"; + protected static final String DEBEZIUM_AVRO_CONFLUENT = "debezium-avro-confluent"; + private static final List SCHEMA_REGISTRY_FORMATS = + Arrays.asList(AVRO_CONFLUENT, DEBEZIUM_AVRO_CONFLUENT); public static void validateTableSourceOptions(ReadableConfig tableOptions) { validateSourceTopic(tableOptions); validateScanStartupMode(tableOptions); } + public static void validateTableSinkOptions(ReadableConfig tableOptions) { + validateSinkTopic(tableOptions); + validateSinkPartitioner(tableOptions); + } + public static void validateSourceTopic(ReadableConfig tableOptions) { Optional> topic = tableOptions.getOptional(TOPIC); Optional pattern = tableOptions.getOptional(TOPIC_PATTERN); @@ -222,6 +256,15 @@ private static StartupMode fromOption(KafkaConnectorOptions.ScanStartupMode scan } } + static void validateDeliveryGuarantee(ReadableConfig tableOptions) { + if (tableOptions.get(DELIVERY_GUARANTEE) == DeliveryGuarantee.EXACTLY_ONCE + && !tableOptions.getOptional(TRANSACTIONAL_ID_PREFIX).isPresent()) { + throw new ValidationException( + TRANSACTIONAL_ID_PREFIX.key() + + " must be specified when using DeliveryGuarantee.EXACTLY_ONCE."); + } + } + /** * Creates an array of indices that determine which physical fields of the table schema to * include in the key format and the order that those fields have in the key format. @@ -339,6 +382,144 @@ public static Pattern getSourceTopicPattern(ReadableConfig tableOptions) { return tableOptions.getOptional(TOPIC_PATTERN).map(Pattern::compile).orElse(null); } + private static void validateSinkPartitioner(ReadableConfig tableOptions) { + tableOptions + .getOptional(SINK_PARTITIONER) + .ifPresent( + partitioner -> { + if (partitioner.equals(SINK_PARTITIONER_VALUE_ROUND_ROBIN) + && tableOptions.getOptional(KEY_FIELDS).isPresent()) { + throw new ValidationException( + "Currently 'round-robin' partitioner only works when option 'key.fields' is not specified."); + } else if (partitioner.isEmpty()) { + throw new ValidationException( + String.format( + "Option '%s' should be a non-empty string.", + SINK_PARTITIONER.key())); + } + }); + } + + /** + * Returns a new table context with a default schema registry subject value in the options if + * the format is a schema registry format (e.g. 'avro-confluent') and the subject is not + * defined. + */ + public static DynamicTableFactory.Context autoCompleteSchemaRegistrySubject( + DynamicTableFactory.Context context) { + Map tableOptions = context.getCatalogTable().getOptions(); + Map newOptions = autoCompleteSchemaRegistrySubject(tableOptions); + if (newOptions.size() > tableOptions.size()) { + // build a new context + return new FactoryUtil.DefaultDynamicTableContext( + context.getObjectIdentifier(), + context.getCatalogTable().copy(newOptions), + context.getEnrichmentOptions(), + context.getConfiguration(), + context.getClassLoader(), + context.isTemporary()); + } else { + return context; + } + } + + private static Map autoCompleteSchemaRegistrySubject( + Map options) { + Configuration configuration = Configuration.fromMap(options); + // the subject autoComplete should only be used in sink, check the topic first + validateSinkTopic(configuration); + final Optional valueFormat = configuration.getOptional(VALUE_FORMAT); + final Optional keyFormat = configuration.getOptional(KEY_FORMAT); + final Optional format = configuration.getOptional(FORMAT); + final String topic = configuration.get(TOPIC).get(0); + + if (format.isPresent() && SCHEMA_REGISTRY_FORMATS.contains(format.get())) { + autoCompleteSubject(configuration, format.get(), topic + "-value"); + } else if (valueFormat.isPresent() && SCHEMA_REGISTRY_FORMATS.contains(valueFormat.get())) { + autoCompleteSubject(configuration, "value." + valueFormat.get(), topic + "-value"); + } + + if (keyFormat.isPresent() && SCHEMA_REGISTRY_FORMATS.contains(keyFormat.get())) { + autoCompleteSubject(configuration, "key." + keyFormat.get(), topic + "-key"); + } + return configuration.toMap(); + } + + public static void validateSinkTopic(ReadableConfig tableOptions) { + String errorMessageTemp = + "Flink Kafka sink currently only supports single topic, but got %s: %s."; + if (!isSingleTopic(tableOptions)) { + if (tableOptions.getOptional(TOPIC_PATTERN).isPresent()) { + throw new ValidationException( + String.format( + errorMessageTemp, + "'topic-pattern'", + tableOptions.get(TOPIC_PATTERN))); + } else { + throw new ValidationException( + String.format(errorMessageTemp, "'topic'", tableOptions.get(TOPIC))); + } + } + } + + private static void autoCompleteSubject( + Configuration configuration, String format, String subject) { + ConfigOption subjectOption = + ConfigOptions.key(format + "." + SCHEMA_REGISTRY_SUBJECT.key()) + .stringType() + .noDefaultValue(); + if (!configuration.getOptional(subjectOption).isPresent()) { + configuration.setString(subjectOption, subject); + } + } + + /** + * The partitioner can be either "fixed", "round-robin" or a customized partitioner full class + * name. + */ + public static Optional> getFlinkKafkaPartitioner( + ReadableConfig tableOptions, ClassLoader classLoader) { + return tableOptions + .getOptional(SINK_PARTITIONER) + .flatMap( + (String partitioner) -> { + switch (partitioner) { + case SINK_PARTITIONER_VALUE_FIXED: + return Optional.of(new FlinkFixedPartitioner<>()); + case SINK_PARTITIONER_VALUE_DEFAULT: + case SINK_PARTITIONER_VALUE_ROUND_ROBIN: + return Optional.empty(); + // Default fallback to full class name of the partitioner. + default: + return Optional.of( + initializePartitioner(partitioner, classLoader)); + } + }); + } + + /** Returns a class value with the given class name. */ + private static FlinkKafkaPartitioner initializePartitioner( + String name, ClassLoader classLoader) { + try { + Class clazz = Class.forName(name, true, classLoader); + if (!FlinkKafkaPartitioner.class.isAssignableFrom(clazz)) { + throw new ValidationException( + String.format( + "Sink partitioner class '%s' should extend from the required class %s", + name, FlinkKafkaPartitioner.class.getName())); + } + @SuppressWarnings("unchecked") + final FlinkKafkaPartitioner kafkaPartitioner = + InstantiationUtil.instantiate(name, FlinkKafkaPartitioner.class, classLoader); + + return kafkaPartitioner; + } catch (ClassNotFoundException | FlinkException e) { + throw new ValidationException( + String.format("Could not find and instantiate partitioner class '%s'", name), + e); + } + } + // -------------------------------------------------------------------------------------------- // Inner classes // -------------------------------------------------------------------------------------------- diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index 0cccc1bcc6e..8c242ce1241 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -22,18 +22,24 @@ import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.base.DeliveryGuarantee; import org.apache.flink.connector.kafka.source.KafkaSourceOptions; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; +import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSink; import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSource; +import org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode; import org.apache.flink.table.api.ValidationException; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.format.EncodingFormat; import org.apache.flink.table.connector.format.Format; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.source.DynamicTableSource; @@ -43,8 +49,11 @@ import org.apache.flink.table.factories.DynamicTableSourceFactory; import org.apache.flink.table.factories.FactoryUtil; import org.apache.flink.table.factories.FactoryUtil.TableFactoryHelper; +import org.apache.flink.table.factories.SerializationFormatFactory; import org.apache.flink.table.types.DataType; import org.apache.flink.types.RowKind; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import javax.annotation.Nullable; @@ -57,22 +66,30 @@ import java.util.Set; import java.util.regex.Pattern; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.DELIVERY_GUARANTEE; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FORMAT; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_TOPIC_PARTITION_DISCOVERY; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARALLELISM; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TRANSACTIONAL_ID_PREFIX; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.StartupOptions; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.autoCompleteSchemaRegistrySubject; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createKeyFormatProjection; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createValueFormatProjection; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getFlinkKafkaPartitioner; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopicPattern; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopics; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getStartupOptions; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.validateTableSinkOptions; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.validateTableSourceOptions; @Internal public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + private static final Logger LOG = LoggerFactory.getLogger(KafkaDynamicTableFactory.class); public static final String IDENTIFIER = "kafka-inlong"; private static final ConfigOption SINK_MULTIPLE_PARTITION_PATTERN = @@ -197,7 +214,54 @@ public DynamicTableSource createDynamicTableSource(Context context) { @Override public DynamicTableSink createDynamicTableSink(Context context) { - return null; + final TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper( + this, autoCompleteSchemaRegistrySubject(context)); + + final Optional>> keyEncodingFormat = + getKeyEncodingFormat(helper); + + final EncodingFormat> valueEncodingFormat = + getValueEncodingFormat(helper); + + helper.validateExcept(KafkaConnectorOptionsUtil.PROPERTIES_PREFIX); + + final ReadableConfig tableOptions = helper.getOptions(); + + final DeliveryGuarantee deliveryGuarantee = validateDeprecatedSemantic(tableOptions); + validateTableSinkOptions(tableOptions); + + KafkaConnectorOptionsUtil.validateDeliveryGuarantee(tableOptions); + + validatePKConstraints( + context.getObjectIdentifier(), + context.getPrimaryKeyIndexes(), + context.getCatalogTable().getOptions(), + valueEncodingFormat); + + final DataType physicalDataType = context.getPhysicalRowDataType(); + + final int[] keyProjection = createKeyFormatProjection(tableOptions, physicalDataType); + + final int[] valueProjection = createValueFormatProjection(tableOptions, physicalDataType); + + final String keyPrefix = tableOptions.getOptional(KEY_FIELDS_PREFIX).orElse(null); + + final Integer parallelism = tableOptions.getOptional(SINK_PARALLELISM).orElse(null); + + return createKafkaTableSink( + physicalDataType, + keyEncodingFormat.orElse(null), + valueEncodingFormat, + keyProjection, + valueProjection, + keyPrefix, + tableOptions.get(TOPIC).get(0), + getKafkaProperties(context.getCatalogTable().getOptions()), + getFlinkKafkaPartitioner(tableOptions, context.getClassLoader()).orElse(null), + deliveryGuarantee, + parallelism, + tableOptions.get(TRANSACTIONAL_ID_PREFIX)); } private static Optional>> getKeyDecodingFormat( @@ -219,6 +283,24 @@ private static Optional>> getKeyDe return keyDecodingFormat; } + private static Optional>> getKeyEncodingFormat( + TableFactoryHelper helper) { + final Optional>> keyEncodingFormat = + helper.discoverOptionalEncodingFormat(SerializationFormatFactory.class, KEY_FORMAT); + keyEncodingFormat.ifPresent( + format -> { + if (!format.getChangelogMode().containsOnly(RowKind.INSERT)) { + throw new ValidationException( + String.format( + "A key format should only deal with INSERT-only records. " + + "But %s has a changelog mode of %s.", + helper.getOptions().get(KEY_FORMAT), + format.getChangelogMode())); + } + }); + return keyEncodingFormat; + } + private static DecodingFormat> getValueDecodingFormat( TableFactoryHelper helper) { return helper.discoverOptionalDecodingFormat( @@ -228,6 +310,15 @@ private static DecodingFormat> getValueDecodingFo DeserializationFormatFactory.class, VALUE_FORMAT)); } + private static EncodingFormat> getValueEncodingFormat( + TableFactoryHelper helper) { + return helper.discoverOptionalEncodingFormat( + SerializationFormatFactory.class, FactoryUtil.FORMAT) + .orElseGet( + () -> helper.discoverEncodingFormat( + SerializationFormatFactory.class, VALUE_FORMAT)); + } + private static void validatePKConstraints( ObjectIdentifier tableName, int[] primaryKeyIndexes, @@ -272,6 +363,18 @@ private static boolean hasKafkaClientProperties(Map tableOptions return tableOptions.keySet().stream().anyMatch(k -> k.startsWith(PROPERTIES_PREFIX)); } + private static DeliveryGuarantee validateDeprecatedSemantic(ReadableConfig tableOptions) { + if (tableOptions.getOptional(SINK_SEMANTIC).isPresent()) { + LOG.warn( + "{} is deprecated and will be removed. Please use {} instead.", + SINK_SEMANTIC.key(), + DELIVERY_GUARANTEE.key()); + return DeliveryGuarantee.valueOf( + tableOptions.get(SINK_SEMANTIC).toUpperCase().replace("-", "_")); + } + return tableOptions.get(DELIVERY_GUARANTEE); + } + protected KafkaDynamicSource createKafkaTableSource( DataType physicalDataType, @Nullable DecodingFormat> keyDecodingFormat, @@ -303,4 +406,35 @@ protected KafkaDynamicSource createKafkaTableSource( tableIdentifier); } + protected KafkaDynamicSink createKafkaTableSink( + DataType physicalDataType, + @Nullable EncodingFormat> keyEncodingFormat, + EncodingFormat> valueEncodingFormat, + int[] keyProjection, + int[] valueProjection, + @Nullable String keyPrefix, + String topic, + Properties properties, + FlinkKafkaPartitioner partitioner, + DeliveryGuarantee deliveryGuarantee, + Integer parallelism, + @Nullable String transactionalIdPrefix) { + return new KafkaDynamicSink( + physicalDataType, + physicalDataType, + keyEncodingFormat, + valueEncodingFormat, + keyProjection, + valueProjection, + keyPrefix, + topic, + properties, + partitioner, + deliveryGuarantee, + false, + SinkBufferFlushMode.DISABLED, + parallelism, + transactionalIdPrefix); + } + } From 9026a2344a3a3cffd82918f00e77cfd770aefe20 Mon Sep 17 00:00:00 2001 From: Hao Date: Tue, 8 Aug 2023 23:55:49 +0800 Subject: [PATCH 07/19] Implementations of UpsertKafkaDynamicTableFactory --- .../sort-connectors/kafka/pom.xml | 2 +- .../table/KafkaConnectorOptionsUtil.java | 25 + .../kafka/table/KafkaDynamicTableFactory.java | 25 +- .../table/UpsertKafkaDynamicTableFactory.java | 496 +++++++++++++++++- 4 files changed, 513 insertions(+), 35 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index 33764aefc29..749e1e2b5f8 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -28,7 +28,7 @@ sort-connector-kafka-v1.15 jar - Apache InLong - Sort-connector-kafka + Apache InLong - Sort-connector-kafka-v1.15 ${project.parent.parent.parent.parent.parent.basedir} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java index 50502823f01..ca980898171 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java @@ -45,6 +45,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Properties; import java.util.regex.Pattern; import java.util.stream.IntStream; @@ -520,6 +521,30 @@ private static FlinkKafkaPartitioner initializePartitioner( } } + public static Properties getKafkaProperties(Map tableOptions) { + final Properties kafkaProperties = new Properties(); + + if (hasKafkaClientProperties(tableOptions)) { + tableOptions.keySet().stream() + .filter(key -> key.startsWith(PROPERTIES_PREFIX)) + .forEach( + key -> { + final String value = tableOptions.get(key); + final String subKey = key.substring((PROPERTIES_PREFIX).length()); + kafkaProperties.put(subKey, value); + }); + } + return kafkaProperties; + } + + /** + * Decides if the table options contains Kafka client properties that start with prefix + * 'properties'. + */ + private static boolean hasKafkaClientProperties(Map tableOptions) { + return tableOptions.keySet().stream().anyMatch(k -> k.startsWith(PROPERTIES_PREFIX)); + } + // -------------------------------------------------------------------------------------------- // Inner classes // -------------------------------------------------------------------------------------------- diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index 8c242ce1241..dd819420203 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -80,6 +80,7 @@ import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createKeyFormatProjection; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createValueFormatProjection; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getFlinkKafkaPartitioner; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getKafkaProperties; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopicPattern; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopics; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getStartupOptions; @@ -339,30 +340,6 @@ private static void validatePKConstraints( } } - public static Properties getKafkaProperties(Map tableOptions) { - final Properties kafkaProperties = new Properties(); - - if (hasKafkaClientProperties(tableOptions)) { - tableOptions.keySet().stream() - .filter(key -> key.startsWith(PROPERTIES_PREFIX)) - .forEach( - key -> { - final String value = tableOptions.get(key); - final String subKey = key.substring((PROPERTIES_PREFIX).length()); - kafkaProperties.put(subKey, value); - }); - } - return kafkaProperties; - } - - /** - * Decides if the table options contains Kafka client properties that start with prefix - * 'properties'. - */ - private static boolean hasKafkaClientProperties(Map tableOptions) { - return tableOptions.keySet().stream().anyMatch(k -> k.startsWith(PROPERTIES_PREFIX)); - } - private static DeliveryGuarantee validateDeprecatedSemantic(ReadableConfig tableOptions) { if (tableOptions.getOptional(SINK_SEMANTIC).isPresent()) { LOG.warn( diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index 1d40cba797a..bfae7d489ed 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -17,38 +17,514 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.streaming.connectors.kafka.config.StartupMode; +import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; +import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSink; +import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSource; +import org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.format.Format; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DeserializationFormatFactory; import org.apache.flink.table.factories.DynamicTableSinkFactory; import org.apache.flink.table.factories.DynamicTableSourceFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.SerializationFormatFactory; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; +import org.apache.flink.table.types.logical.utils.LogicalTypeChecks; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.Preconditions; +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Properties; import java.util.Set; +import java.util.stream.IntStream; + +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FORMAT; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.PROPS_BOOTSTRAP_SERVERS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_BUFFER_FLUSH_INTERVAL; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_BUFFER_FLUSH_MAX_ROWS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARALLELISM; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARTITIONER; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TRANSACTIONAL_ID_PREFIX; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FIELDS_INCLUDE; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; +import static org.apache.inlong.sort.base.Constants.INLONG_METRIC; +import static org.apache.inlong.sort.kafka.KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.autoCompleteSchemaRegistrySubject; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getKafkaProperties; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopicPattern; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopics; public class UpsertKafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { + public static final String IDENTIFIER = "upsert-kafka-inlong"; + + private static final ConfigOption SINK_MULTIPLE_PARTITION_PATTERN = + ConfigOptions.key("sink.multiple.partition-pattern") + .stringType() + .noDefaultValue() + .withDescription( + "option 'sink.multiple.partition-pattern' used either when the partitioner is raw-hash, or when passing in designated partition field names for custom field partitions"); + + private static final ConfigOption SINK_FIXED_IDENTIFIER = + ConfigOptions.key("sink.fixed.identifier") + .stringType() + .defaultValue("-1"); + @Override - public DynamicTableSink createDynamicTableSink(Context context) { - return null; + public String factoryIdentifier() { + return IDENTIFIER; } @Override - public DynamicTableSource createDynamicTableSource(Context context) { - return null; + public Set> requiredOptions() { + final Set> options = new HashSet<>(); + options.add(PROPS_BOOTSTRAP_SERVERS); + options.add(TOPIC); + options.add(KEY_FORMAT); + options.add(VALUE_FORMAT); + return options; } @Override - public String factoryIdentifier() { - return null; + public Set> optionalOptions() { + final Set> options = new HashSet<>(); + options.add(KEY_FIELDS_PREFIX); + options.add(VALUE_FIELDS_INCLUDE); + options.add(SINK_PARALLELISM); + options.add(SINK_BUFFER_FLUSH_INTERVAL); + options.add(SINK_BUFFER_FLUSH_MAX_ROWS); + options.add(KAFKA_IGNORE_ALL_CHANGELOG); + options.add(INLONG_METRIC); + options.add(SINK_PARTITIONER); + options.add(SINK_MULTIPLE_PARTITION_PATTERN); + options.add(SINK_FIXED_IDENTIFIER); + return options; } @Override - public Set> requiredOptions() { - return null; + public DynamicTableSource createDynamicTableSource(Context context) { + FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); + + ReadableConfig tableOptions = helper.getOptions(); + DecodingFormat> keyDecodingFormat = + helper.discoverDecodingFormat(DeserializationFormatFactory.class, KEY_FORMAT); + DecodingFormat> valueDecodingFormat = + helper.discoverDecodingFormat(DeserializationFormatFactory.class, VALUE_FORMAT); + + // Validate the option data type. + helper.validateExcept(PROPERTIES_PREFIX); + validateSource( + tableOptions, + keyDecodingFormat, + valueDecodingFormat, + context.getPrimaryKeyIndexes()); + + Tuple2 keyValueProjections = + createKeyValueProjections(context.getCatalogTable()); + String keyPrefix = tableOptions.getOptional(KEY_FIELDS_PREFIX).orElse(null); + Properties properties = getKafkaProperties(context.getCatalogTable().getOptions()); + // always use earliest to keep data integrity + StartupMode earliest = StartupMode.EARLIEST; + + return new KafkaDynamicSource( + context.getPhysicalRowDataType(), + keyDecodingFormat, + new DecodingFormatWrapper(valueDecodingFormat), + keyValueProjections.f0, + keyValueProjections.f1, + keyPrefix, + getSourceTopics(tableOptions), + getSourceTopicPattern(tableOptions), + properties, + earliest, + Collections.emptyMap(), + 0, + true, + context.getObjectIdentifier().asSummaryString()); } @Override - public Set> optionalOptions() { - return null; + public DynamicTableSink createDynamicTableSink(Context context) { + FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper( + this, autoCompleteSchemaRegistrySubject(context)); + + final ReadableConfig tableOptions = helper.getOptions(); + + EncodingFormat> keyEncodingFormat = + helper.discoverEncodingFormat(SerializationFormatFactory.class, KEY_FORMAT); + EncodingFormat> valueEncodingFormat = + helper.discoverEncodingFormat(SerializationFormatFactory.class, VALUE_FORMAT); + + // Validate the option data type. + helper.validateExcept(PROPERTIES_PREFIX); + validateSink( + tableOptions, + keyEncodingFormat, + valueEncodingFormat, + context.getPrimaryKeyIndexes()); + + Tuple2 keyValueProjections = + createKeyValueProjections(context.getCatalogTable()); + final String keyPrefix = tableOptions.getOptional(KEY_FIELDS_PREFIX).orElse(null); + final Properties properties = getKafkaProperties(context.getCatalogTable().getOptions()); + + Integer parallelism = tableOptions.get(SINK_PARALLELISM); + + int batchSize = tableOptions.get(SINK_BUFFER_FLUSH_MAX_ROWS); + Duration batchInterval = tableOptions.get(SINK_BUFFER_FLUSH_INTERVAL); + SinkBufferFlushMode flushMode = + new SinkBufferFlushMode(batchSize, batchInterval.toMillis()); + + // use {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. + // it will use hash partition if key is set else in round-robin behaviour. + return new KafkaDynamicSink( + context.getPhysicalRowDataType(), + context.getPhysicalRowDataType(), + keyEncodingFormat, + new EncodingFormatWrapper(valueEncodingFormat), + keyValueProjections.f0, + keyValueProjections.f1, + keyPrefix, + tableOptions.get(TOPIC).get(0), + properties, + null, + DeliveryGuarantee.AT_LEAST_ONCE, + true, + flushMode, + parallelism, + tableOptions.get(TRANSACTIONAL_ID_PREFIX)); + } + + private static void validateSource( + ReadableConfig tableOptions, + Format keyFormat, + Format valueFormat, + int[] primaryKeyIndexes) { + validateTopic(tableOptions); + validateFormat(keyFormat, valueFormat, tableOptions); + validatePKConstraints(primaryKeyIndexes); + } + + private static void validateSink( + ReadableConfig tableOptions, + Format keyFormat, + Format valueFormat, + int[] primaryKeyIndexes) { + validateTopic(tableOptions); + validateFormat(keyFormat, valueFormat, tableOptions); + validatePKConstraints(primaryKeyIndexes); + validateSinkBufferFlush(tableOptions); + } + + private static void validateTopic(ReadableConfig tableOptions) { + List topic = tableOptions.get(TOPIC); + if (topic.size() > 1) { + throw new ValidationException( + "The 'upsert-kafka' connector doesn't support topic list now. " + + "Please use single topic as the value of the parameter 'topic'."); + } + } + + private static void validateFormat( + Format keyFormat, Format valueFormat, ReadableConfig tableOptions) { + if (!keyFormat.getChangelogMode().containsOnly(RowKind.INSERT)) { + String identifier = tableOptions.get(KEY_FORMAT); + throw new ValidationException( + String.format( + "'upsert-kafka' connector doesn't support '%s' as key format, " + + "because '%s' is not in insert-only mode.", + identifier, identifier)); + } + if (!valueFormat.getChangelogMode().containsOnly(RowKind.INSERT)) { + String identifier = tableOptions.get(VALUE_FORMAT); + throw new ValidationException( + String.format( + "'upsert-kafka' connector doesn't support '%s' as value format, " + + "because '%s' is not in insert-only mode.", + identifier, identifier)); + } + } + + private static void validatePKConstraints(int[] schema) { + if (schema.length == 0) { + throw new ValidationException( + "'upsert-kafka' tables require to define a PRIMARY KEY constraint. " + + "The PRIMARY KEY specifies which columns should be read from or write to the Kafka message key. " + + "The PRIMARY KEY also defines records in the 'upsert-kafka' table should update or delete on which keys."); + } + } + + private static void validateSinkBufferFlush(ReadableConfig tableOptions) { + int flushMaxRows = tableOptions.get(SINK_BUFFER_FLUSH_MAX_ROWS); + long flushIntervalMs = tableOptions.get(SINK_BUFFER_FLUSH_INTERVAL).toMillis(); + if (flushMaxRows > 0 && flushIntervalMs > 0) { + // flush is enabled + return; + } + if (flushMaxRows <= 0 && flushIntervalMs <= 0) { + // flush is disabled + return; + } + // one of them is set which is not allowed + throw new ValidationException( + String.format( + "'%s' and '%s' must be set to be greater than zero together to enable sink buffer flushing.", + SINK_BUFFER_FLUSH_MAX_ROWS.key(), SINK_BUFFER_FLUSH_INTERVAL.key())); + } + + private Tuple2 createKeyValueProjections(ResolvedCatalogTable catalogTable) { + ResolvedSchema schema = catalogTable.getResolvedSchema(); + // primary key should validated earlier + List keyFields = schema.getPrimaryKey().get().getColumns(); + DataType physicalDataType = schema.toPhysicalRowDataType(); + + Configuration tableOptions = Configuration.fromMap(catalogTable.getOptions()); + // upsert-kafka will set key.fields to primary key fields by default + tableOptions.set(KEY_FIELDS, keyFields); + + int[] keyProjection = createKeyFormatProjection(tableOptions, physicalDataType); + int[] valueProjection = createValueFormatProjection(tableOptions, physicalDataType); + + return Tuple2.of(keyProjection, valueProjection); + } + + /** + * Creates an array of indices that determine which physical fields of the table schema to + * include in the key format and the order that those fields have in the key format. + * + *

See {@link KafkaConnectorOptions#KEY_FORMAT}, {@link KafkaConnectorOptions#KEY_FIELDS}, + * and {@link KafkaConnectorOptions#KEY_FIELDS_PREFIX} for more information. + */ + public static int[] createKeyFormatProjection( + ReadableConfig options, DataType physicalDataType) { + final LogicalType physicalType = physicalDataType.getLogicalType(); + Preconditions.checkArgument( + physicalType.is(LogicalTypeRoot.ROW), "Row data type expected."); + final Optional optionalKeyFormat = options.getOptional(KEY_FORMAT); + final Optional> optionalKeyFields = options.getOptional(KEY_FIELDS); + + if (!optionalKeyFormat.isPresent() && optionalKeyFields.isPresent()) { + throw new ValidationException( + String.format( + "The option '%s' can only be declared if a key format is defined using '%s'.", + KEY_FIELDS.key(), KEY_FORMAT.key())); + } else if (optionalKeyFormat.isPresent() + && (!optionalKeyFields.isPresent() || optionalKeyFields.get().size() == 0)) { + throw new ValidationException( + String.format( + "A key format '%s' requires the declaration of one or more of key fields using '%s'.", + KEY_FORMAT.key(), KEY_FIELDS.key())); + } + + if (!optionalKeyFormat.isPresent()) { + return new int[0]; + } + + final String keyPrefix = options.getOptional(KEY_FIELDS_PREFIX).orElse(""); + + final List keyFields = optionalKeyFields.get(); + final List physicalFields = LogicalTypeChecks.getFieldNames(physicalType); + return keyFields.stream() + .mapToInt( + keyField -> { + final int pos = physicalFields.indexOf(keyField); + // check that field name exists + if (pos < 0) { + throw new ValidationException( + String.format( + "Could not find the field '%s' in the table schema for usage in the key format. " + + "A key field must be a regular, physical column. " + + "The following columns can be selected in the '%s' option:\n" + + "%s", + keyField, KEY_FIELDS.key(), physicalFields)); + } + // check that field name is prefixed correctly + if (!keyField.startsWith(keyPrefix)) { + throw new ValidationException( + String.format( + "All fields in '%s' must be prefixed with '%s' when option '%s' " + + "is set but field '%s' is not prefixed.", + KEY_FIELDS.key(), + keyPrefix, + KEY_FIELDS_PREFIX.key(), + keyField)); + } + return pos; + }) + .toArray(); + } + + /** + * Creates an array of indices that determine which physical fields of the table schema to + * include in the value format. + * + *

See {@link KafkaConnectorOptions#VALUE_FORMAT}, {@link + * KafkaConnectorOptions#VALUE_FIELDS_INCLUDE}, and {@link + * KafkaConnectorOptions#KEY_FIELDS_PREFIX} for more information. + */ + public static int[] createValueFormatProjection( + ReadableConfig options, DataType physicalDataType) { + final LogicalType physicalType = physicalDataType.getLogicalType(); + Preconditions.checkArgument( + physicalType.is(LogicalTypeRoot.ROW), "Row data type expected."); + final int physicalFieldCount = LogicalTypeChecks.getFieldCount(physicalType); + final IntStream physicalFields = IntStream.range(0, physicalFieldCount); + + final String keyPrefix = options.getOptional(KEY_FIELDS_PREFIX).orElse(""); + + final KafkaConnectorOptions.ValueFieldsStrategy strategy = options.get(VALUE_FIELDS_INCLUDE); + if (strategy == KafkaConnectorOptions.ValueFieldsStrategy.ALL) { + if (keyPrefix.length() > 0) { + throw new ValidationException( + String.format( + "A key prefix is not allowed when option '%s' is set to '%s'. " + + "Set it to '%s' instead to avoid field overlaps.", + VALUE_FIELDS_INCLUDE.key(), + KafkaConnectorOptions.ValueFieldsStrategy.ALL, + KafkaConnectorOptions.ValueFieldsStrategy.EXCEPT_KEY)); + } + return physicalFields.toArray(); + } else if (strategy == KafkaConnectorOptions.ValueFieldsStrategy.EXCEPT_KEY) { + final int[] keyProjection = createKeyFormatProjection(options, physicalDataType); + return physicalFields + .filter(pos -> IntStream.of(keyProjection).noneMatch(k -> k == pos)) + .toArray(); + } + throw new TableException("Unknown value fields strategy:" + strategy); + } + + /** + * It is used to wrap the decoding format and expose the desired changelog mode. It's only works + * for insert-only format. + */ + protected static class DecodingFormatWrapper + implements + DecodingFormat> { + + private final DecodingFormat> innerDecodingFormat; + + private static final ChangelogMode SOURCE_CHANGELOG_MODE = + ChangelogMode.newBuilder() + .addContainedKind(RowKind.UPDATE_AFTER) + .addContainedKind(RowKind.DELETE) + .build(); + + public DecodingFormatWrapper( + DecodingFormat> innerDecodingFormat) { + this.innerDecodingFormat = innerDecodingFormat; + } + + @Override + public DeserializationSchema createRuntimeDecoder( + DynamicTableSource.Context context, DataType producedDataType) { + return innerDecodingFormat.createRuntimeDecoder(context, producedDataType); + } + + @Override + public ChangelogMode getChangelogMode() { + return SOURCE_CHANGELOG_MODE; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + DecodingFormatWrapper that = (DecodingFormatWrapper) obj; + return Objects.equals(innerDecodingFormat, that.innerDecodingFormat); + } + + @Override + public int hashCode() { + return Objects.hash(innerDecodingFormat); + } + } + + /** + * It is used to wrap the encoding format and expose the desired changelog mode. It's only works + * for insert-only format. + */ + protected static class EncodingFormatWrapper + implements + EncodingFormat> { + + private final EncodingFormat> innerEncodingFormat; + + public static final ChangelogMode SINK_CHANGELOG_MODE = + ChangelogMode.newBuilder() + .addContainedKind(RowKind.INSERT) + .addContainedKind(RowKind.UPDATE_AFTER) + .addContainedKind(RowKind.DELETE) + .build(); + + public EncodingFormatWrapper( + EncodingFormat> innerEncodingFormat) { + this.innerEncodingFormat = innerEncodingFormat; + } + + @Override + public SerializationSchema createRuntimeEncoder( + DynamicTableSink.Context context, DataType consumedDataType) { + return innerEncodingFormat.createRuntimeEncoder(context, consumedDataType); + } + + @Override + public ChangelogMode getChangelogMode() { + return SINK_CHANGELOG_MODE; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + EncodingFormatWrapper that = (EncodingFormatWrapper) obj; + return Objects.equals(innerEncodingFormat, that.innerEncodingFormat); + } + + @Override + public int hashCode() { + return Objects.hash(innerEncodingFormat); + } } } From 11b900020f00b5f5597a00ed32176b5edf14e171 Mon Sep 17 00:00:00 2001 From: Hao Date: Thu, 10 Aug 2023 16:19:06 +0800 Subject: [PATCH 08/19] Optimize ConfigOption --- .../inlong/sort/kafka/KafkaOptions.java | 13 +++ .../kafka/table/KafkaDynamicTableFactory.java | 89 +++++++++++-------- .../table/UpsertKafkaDynamicTableFactory.java | 28 ++---- 3 files changed, 73 insertions(+), 57 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java index 69909964925..5622eaa39ee 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java @@ -26,6 +26,19 @@ public class KafkaOptions { private KafkaOptions() { } + public static final ConfigOption SINK_MULTIPLE_PARTITION_PATTERN = + ConfigOptions.key("sink.multiple.partition-pattern") + .stringType() + .noDefaultValue() + .withDescription( + "option 'sink.multiple.partition-pattern' used either when the partitioner is raw-hash, or when passing in designated partition field names for custom field partitions"); + + public static final ConfigOption SINK_FIXED_IDENTIFIER = + ConfigOptions.key("sink.fixed.identifier") + .stringType() + .defaultValue("-1"); + + // -------------------------------------------------------------------------------------------- // Sink specific options // -------------------------------------------------------------------------------------------- diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index dd819420203..b7006cc0b57 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -17,9 +17,6 @@ package org.apache.inlong.sort.kafka.table; -import org.apache.inlong.sort.base.Constants; -import org.apache.inlong.sort.kafka.KafkaOptions; - import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.api.common.serialization.SerializationSchema; @@ -32,7 +29,6 @@ import org.apache.flink.streaming.connectors.kafka.config.StartupMode; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions; import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSink; import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSource; import org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode; @@ -52,11 +48,12 @@ import org.apache.flink.table.factories.SerializationFormatFactory; import org.apache.flink.table.types.DataType; import org.apache.flink.types.RowKind; +import org.apache.inlong.sort.base.Constants; +import org.apache.inlong.sort.kafka.KafkaOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; - import java.time.Duration; import java.util.HashSet; import java.util.List; @@ -65,15 +62,27 @@ import java.util.Properties; import java.util.Set; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.DELIVERY_GUARANTEE; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FORMAT; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.PROPS_BOOTSTRAP_SERVERS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.PROPS_GROUP_ID; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_MODE; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_SPECIFIC_OFFSETS; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_STARTUP_TIMESTAMP_MILLIS; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SCAN_TOPIC_PARTITION_DISCOVERY; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARALLELISM; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARTITIONER; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC_PATTERN; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TRANSACTIONAL_ID_PREFIX; +import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FIELDS_INCLUDE; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; +import static org.apache.flink.table.factories.FactoryUtil.FORMAT; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.StartupOptions; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.autoCompleteSchemaRegistrySubject; @@ -91,19 +100,6 @@ public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { private static final Logger LOG = LoggerFactory.getLogger(KafkaDynamicTableFactory.class); - public static final String IDENTIFIER = "kafka-inlong"; - - private static final ConfigOption SINK_MULTIPLE_PARTITION_PATTERN = - ConfigOptions.key("sink.multiple.partition-pattern") - .stringType() - .noDefaultValue() - .withDescription( - "option 'sink.multiple.partition-pattern' used either when the partitioner is raw-hash, or when passing in designated partition field names for custom field partitions"); - - private static final ConfigOption SINK_FIXED_IDENTIFIER = - ConfigOptions.key("sink.fixed.identifier") - .stringType() - .defaultValue("-1"); private static final ConfigOption SINK_SEMANTIC = ConfigOptions.key("sink.semantic") @@ -111,6 +107,8 @@ public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, Dyna .noDefaultValue() .withDescription("Optional semantic when committing."); + public static final String IDENTIFIER = "kafka-inlong"; + @Override public String factoryIdentifier() { return IDENTIFIER; @@ -119,28 +117,31 @@ public String factoryIdentifier() { @Override public Set> requiredOptions() { final Set> options = new HashSet<>(); - options.add(KafkaConnectorOptions.PROPS_BOOTSTRAP_SERVERS); + options.add(PROPS_BOOTSTRAP_SERVERS); return options; } @Override public Set> optionalOptions() { final Set> options = new HashSet<>(); - options.add(FactoryUtil.FORMAT); + options.add(FORMAT); options.add(KEY_FORMAT); - options.add(KafkaConnectorOptions.KEY_FIELDS); + options.add(KEY_FIELDS); options.add(KEY_FIELDS_PREFIX); options.add(VALUE_FORMAT); - options.add(KafkaConnectorOptions.VALUE_FIELDS_INCLUDE); - options.add(KafkaConnectorOptions.TOPIC); - options.add(KafkaConnectorOptions.TOPIC_PATTERN); - options.add(KafkaConnectorOptions.PROPS_GROUP_ID); - options.add(KafkaConnectorOptions.SCAN_STARTUP_MODE); - options.add(KafkaConnectorOptions.SCAN_STARTUP_SPECIFIC_OFFSETS); + options.add(VALUE_FIELDS_INCLUDE); + options.add(TOPIC); + options.add(TOPIC_PATTERN); + options.add(PROPS_GROUP_ID); + options.add(SCAN_STARTUP_MODE); + options.add(SCAN_STARTUP_SPECIFIC_OFFSETS); options.add(SCAN_TOPIC_PARTITION_DISCOVERY); - options.add(KafkaConnectorOptions.SCAN_STARTUP_TIMESTAMP_MILLIS); - options.add(KafkaConnectorOptions.SINK_PARTITIONER); - options.add(FactoryUtil.SINK_PARALLELISM); + options.add(SCAN_STARTUP_TIMESTAMP_MILLIS); + options.add(SINK_PARTITIONER); + options.add(SINK_PARALLELISM); + options.add(DELIVERY_GUARANTEE); + options.add(TRANSACTIONAL_ID_PREFIX); + options.add(SINK_SEMANTIC); options.add(Constants.INLONG_METRIC); options.add(Constants.INLONG_AUDIT); options.add(Constants.AUDIT_KEYS); @@ -150,12 +151,28 @@ public Set> optionalOptions() { options.add(Constants.SINK_SCHEMA_CHANGE_ENABLE); options.add(Constants.SINK_SCHEMA_CHANGE_POLICIES); options.add(KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG); - options.add(SINK_MULTIPLE_PARTITION_PATTERN); - options.add(SINK_FIXED_IDENTIFIER); - options.add(SINK_SEMANTIC); + options.add(KafkaOptions.SINK_MULTIPLE_PARTITION_PATTERN); + options.add(KafkaOptions.SINK_FIXED_IDENTIFIER); return options; } + @Override + public Set> forwardOptions() { + return Stream.of( + PROPS_BOOTSTRAP_SERVERS, + PROPS_GROUP_ID, + TOPIC, + TOPIC_PATTERN, + SCAN_STARTUP_MODE, + SCAN_STARTUP_SPECIFIC_OFFSETS, + SCAN_TOPIC_PARTITION_DISCOVERY, + SCAN_STARTUP_TIMESTAMP_MILLIS, + SINK_PARTITIONER, + SINK_PARALLELISM, + TRANSACTIONAL_ID_PREFIX) + .collect(Collectors.toSet()); + } + @Override public DynamicTableSource createDynamicTableSource(Context context) { final TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); @@ -305,7 +322,7 @@ private static Optional>> getKeyEnco private static DecodingFormat> getValueDecodingFormat( TableFactoryHelper helper) { return helper.discoverOptionalDecodingFormat( - DeserializationFormatFactory.class, FactoryUtil.FORMAT) + DeserializationFormatFactory.class, FORMAT) .orElseGet( () -> helper.discoverDecodingFormat( DeserializationFormatFactory.class, VALUE_FORMAT)); @@ -314,7 +331,7 @@ private static DecodingFormat> getValueDecodingFo private static EncodingFormat> getValueEncodingFormat( TableFactoryHelper helper) { return helper.discoverOptionalEncodingFormat( - SerializationFormatFactory.class, FactoryUtil.FORMAT) + SerializationFormatFactory.class, FORMAT) .orElseGet( () -> helper.discoverEncodingFormat( SerializationFormatFactory.class, VALUE_FORMAT)); @@ -330,7 +347,7 @@ private static void validatePKConstraints( Configuration configuration = Configuration.fromMap(options); String formatName = configuration - .getOptional(FactoryUtil.FORMAT) + .getOptional(FORMAT) .orElse(configuration.get(VALUE_FORMAT)); throw new ValidationException( String.format( diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index bfae7d489ed..a4d59ea85f1 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -21,7 +21,6 @@ import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.connector.base.DeliveryGuarantee; @@ -52,6 +51,8 @@ import org.apache.flink.table.types.logical.utils.LogicalTypeChecks; import org.apache.flink.types.RowKind; import org.apache.flink.util.Preconditions; +import org.apache.inlong.sort.base.Constants; +import org.apache.inlong.sort.kafka.KafkaOptions; import java.time.Duration; import java.util.Collections; @@ -70,13 +71,10 @@ import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_BUFFER_FLUSH_INTERVAL; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_BUFFER_FLUSH_MAX_ROWS; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARALLELISM; -import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.SINK_PARTITIONER; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TOPIC; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.TRANSACTIONAL_ID_PREFIX; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FIELDS_INCLUDE; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; -import static org.apache.inlong.sort.base.Constants.INLONG_METRIC; -import static org.apache.inlong.sort.kafka.KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.autoCompleteSchemaRegistrySubject; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getKafkaProperties; @@ -87,18 +85,6 @@ public class UpsertKafkaDynamicTableFactory implements DynamicTableSourceFactory public static final String IDENTIFIER = "upsert-kafka-inlong"; - private static final ConfigOption SINK_MULTIPLE_PARTITION_PATTERN = - ConfigOptions.key("sink.multiple.partition-pattern") - .stringType() - .noDefaultValue() - .withDescription( - "option 'sink.multiple.partition-pattern' used either when the partitioner is raw-hash, or when passing in designated partition field names for custom field partitions"); - - private static final ConfigOption SINK_FIXED_IDENTIFIER = - ConfigOptions.key("sink.fixed.identifier") - .stringType() - .defaultValue("-1"); - @Override public String factoryIdentifier() { return IDENTIFIER; @@ -122,11 +108,11 @@ public Set> optionalOptions() { options.add(SINK_PARALLELISM); options.add(SINK_BUFFER_FLUSH_INTERVAL); options.add(SINK_BUFFER_FLUSH_MAX_ROWS); - options.add(KAFKA_IGNORE_ALL_CHANGELOG); - options.add(INLONG_METRIC); - options.add(SINK_PARTITIONER); - options.add(SINK_MULTIPLE_PARTITION_PATTERN); - options.add(SINK_FIXED_IDENTIFIER); + options.add(KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG); + options.add(Constants.INLONG_METRIC); + options.add(KafkaOptions.SINK_MULTIPLE_PARTITION_PATTERN); + options.add(KafkaOptions.SINK_FIXED_IDENTIFIER); + options.add(KafkaConnectorOptions.SINK_PARTITIONER); return options; } From 29280de9008c41bf0ea99d5a93942980e8f2de87 Mon Sep 17 00:00:00 2001 From: Hao Date: Thu, 10 Aug 2023 16:20:44 +0800 Subject: [PATCH 09/19] fix format --- .../inlong/sort/kafka/KafkaOptions.java | 1 - .../kafka/table/KafkaDynamicTableFactory.java | 28 ++++++++++--------- .../table/UpsertKafkaDynamicTableFactory.java | 5 ++-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java index 5622eaa39ee..6962eb6948c 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/KafkaOptions.java @@ -38,7 +38,6 @@ private KafkaOptions() { .stringType() .defaultValue("-1"); - // -------------------------------------------------------------------------------------------- // Sink specific options // -------------------------------------------------------------------------------------------- diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index b7006cc0b57..abc1630dbfd 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -17,6 +17,9 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.inlong.sort.base.Constants; +import org.apache.inlong.sort.kafka.KafkaOptions; + import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.api.common.serialization.SerializationSchema; @@ -48,12 +51,11 @@ import org.apache.flink.table.factories.SerializationFormatFactory; import org.apache.flink.table.types.DataType; import org.apache.flink.types.RowKind; -import org.apache.inlong.sort.base.Constants; -import org.apache.inlong.sort.kafka.KafkaOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; + import java.time.Duration; import java.util.HashSet; import java.util.List; @@ -159,17 +161,17 @@ public Set> optionalOptions() { @Override public Set> forwardOptions() { return Stream.of( - PROPS_BOOTSTRAP_SERVERS, - PROPS_GROUP_ID, - TOPIC, - TOPIC_PATTERN, - SCAN_STARTUP_MODE, - SCAN_STARTUP_SPECIFIC_OFFSETS, - SCAN_TOPIC_PARTITION_DISCOVERY, - SCAN_STARTUP_TIMESTAMP_MILLIS, - SINK_PARTITIONER, - SINK_PARALLELISM, - TRANSACTIONAL_ID_PREFIX) + PROPS_BOOTSTRAP_SERVERS, + PROPS_GROUP_ID, + TOPIC, + TOPIC_PATTERN, + SCAN_STARTUP_MODE, + SCAN_STARTUP_SPECIFIC_OFFSETS, + SCAN_TOPIC_PARTITION_DISCOVERY, + SCAN_STARTUP_TIMESTAMP_MILLIS, + SINK_PARTITIONER, + SINK_PARALLELISM, + TRANSACTIONAL_ID_PREFIX) .collect(Collectors.toSet()); } diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index a4d59ea85f1..6ddfb712e60 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -17,6 +17,9 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.inlong.sort.base.Constants; +import org.apache.inlong.sort.kafka.KafkaOptions; + import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.api.java.tuple.Tuple2; @@ -51,8 +54,6 @@ import org.apache.flink.table.types.logical.utils.LogicalTypeChecks; import org.apache.flink.types.RowKind; import org.apache.flink.util.Preconditions; -import org.apache.inlong.sort.base.Constants; -import org.apache.inlong.sort.kafka.KafkaOptions; import java.time.Duration; import java.util.Collections; From db5644761efccd0a8839ac7dad9ae5bdc31bba4e Mon Sep 17 00:00:00 2001 From: Hao Date: Tue, 22 Aug 2023 16:21:24 +0800 Subject: [PATCH 10/19] update LICENSE --- licenses/inlong-sort-connectors/LICENSE | 1 + 1 file changed, 1 insertion(+) diff --git a/licenses/inlong-sort-connectors/LICENSE b/licenses/inlong-sort-connectors/LICENSE index 9388d367717..ec46cd86dbc 100644 --- a/licenses/inlong-sort-connectors/LICENSE +++ b/licenses/inlong-sort-connectors/LICENSE @@ -817,6 +817,7 @@ 1.3.20 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java + inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java Source : org.apache.flink:flink-connector-kafka:1.15.4 (Please note that the software have been modified.) License : https://github.com/apache/flink/blob/master/LICENSE From c0119be41544c171894ce7e68a4ab4488bbc895c Mon Sep 17 00:00:00 2001 From: Hao Date: Wed, 23 Aug 2023 12:04:45 +0800 Subject: [PATCH 11/19] Optimize Documentation --- .../table/KafkaConnectorOptionsUtil.java | 124 +++++++++------ .../kafka/table/KafkaDynamicTableFactory.java | 4 + .../table/UpsertKafkaDynamicTableFactory.java | 148 ++++-------------- 3 files changed, 104 insertions(+), 172 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java index ca980898171..0e7a61493b3 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java @@ -17,6 +17,7 @@ package org.apache.inlong.sort.kafka.table; +import org.apache.flink.annotation.Internal; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.Configuration; @@ -64,11 +65,18 @@ import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; import static org.apache.flink.table.factories.FactoryUtil.FORMAT; -public class KafkaConnectorOptionsUtil { +/** Utilities for {@link KafkaConnectorOptions}. */ +@Internal +class KafkaConnectorOptionsUtil { private static final ConfigOption SCHEMA_REGISTRY_SUBJECT = ConfigOptions.key("schema-registry.subject").stringType().noDefaultValue(); + // -------------------------------------------------------------------------------------------- + // Option enumerations + // -------------------------------------------------------------------------------------------- + + // Prefix for Kafka specific properties. public static final String PROPERTIES_PREFIX = "properties."; // Sink partitioner. @@ -84,6 +92,10 @@ public class KafkaConnectorOptionsUtil { private static final List SCHEMA_REGISTRY_FORMATS = Arrays.asList(AVRO_CONFLUENT, DEBEZIUM_AVRO_CONFLUENT); + // -------------------------------------------------------------------------------------------- + // Validation + // -------------------------------------------------------------------------------------------- + public static void validateTableSourceOptions(ReadableConfig tableOptions) { validateSourceTopic(tableOptions); validateScanStartupMode(tableOptions); @@ -108,6 +120,23 @@ public static void validateSourceTopic(ReadableConfig tableOptions) { } } + public static void validateSinkTopic(ReadableConfig tableOptions) { + String errorMessageTemp = + "Flink Kafka sink currently only supports single topic, but got %s: %s."; + if (!isSingleTopic(tableOptions)) { + if (tableOptions.getOptional(TOPIC_PATTERN).isPresent()) { + throw new ValidationException( + String.format( + errorMessageTemp, + "'topic-pattern'", + tableOptions.get(TOPIC_PATTERN))); + } else { + throw new ValidationException( + String.format(errorMessageTemp, "'topic'", tableOptions.get(TOPIC))); + } + } + } + private static void validateScanStartupMode(ReadableConfig tableOptions) { tableOptions .getOptional(SCAN_STARTUP_MODE) @@ -152,11 +181,52 @@ private static void validateScanStartupMode(ReadableConfig tableOptions) { }); } + private static void validateSinkPartitioner(ReadableConfig tableOptions) { + tableOptions + .getOptional(SINK_PARTITIONER) + .ifPresent( + partitioner -> { + if (partitioner.equals(SINK_PARTITIONER_VALUE_ROUND_ROBIN) + && tableOptions.getOptional(KEY_FIELDS).isPresent()) { + throw new ValidationException( + "Currently 'round-robin' partitioner only works when option 'key.fields' is not specified."); + } else if (partitioner.isEmpty()) { + throw new ValidationException( + String.format( + "Option '%s' should be a non-empty string.", + SINK_PARTITIONER.key())); + } + }); + } + + // -------------------------------------------------------------------------------------------- + // Utilities + // -------------------------------------------------------------------------------------------- + + public static List getSourceTopics(ReadableConfig tableOptions) { + return tableOptions.getOptional(TOPIC).orElse(null); + } + + public static Pattern getSourceTopicPattern(ReadableConfig tableOptions) { + return tableOptions.getOptional(TOPIC_PATTERN).map(Pattern::compile).orElse(null); + } + private static boolean isSingleTopic(ReadableConfig tableOptions) { // Option 'topic-pattern' is regarded as multi-topics. return tableOptions.getOptional(TOPIC).map(t -> t.size() == 1).orElse(false); } + /** + * Parses SpecificOffsets String to Map. + * + *

SpecificOffsets String format was given as following: + * + *

+     *     scan.startup.specific-offsets = partition:0,offset:42;partition:1,offset:300
+     * 
+ * + * @return SpecificOffsets with Map format, key is partition, and value is offset + */ public static Map parseSpecificOffsets( String specificOffsetsStr, String optionKey) { final Map offsetMap = new HashMap<>(); @@ -371,36 +441,6 @@ public static int[] createValueFormatProjection( throw new TableException("Unknown value fields strategy:" + strategy); } - // -------------------------------------------------------------------------------------------- - // Utilities - // -------------------------------------------------------------------------------------------- - - public static List getSourceTopics(ReadableConfig tableOptions) { - return tableOptions.getOptional(TOPIC).orElse(null); - } - - public static Pattern getSourceTopicPattern(ReadableConfig tableOptions) { - return tableOptions.getOptional(TOPIC_PATTERN).map(Pattern::compile).orElse(null); - } - - private static void validateSinkPartitioner(ReadableConfig tableOptions) { - tableOptions - .getOptional(SINK_PARTITIONER) - .ifPresent( - partitioner -> { - if (partitioner.equals(SINK_PARTITIONER_VALUE_ROUND_ROBIN) - && tableOptions.getOptional(KEY_FIELDS).isPresent()) { - throw new ValidationException( - "Currently 'round-robin' partitioner only works when option 'key.fields' is not specified."); - } else if (partitioner.isEmpty()) { - throw new ValidationException( - String.format( - "Option '%s' should be a non-empty string.", - SINK_PARTITIONER.key())); - } - }); - } - /** * Returns a new table context with a default schema registry subject value in the options if * the format is a schema registry format (e.g. 'avro-confluent') and the subject is not @@ -446,23 +486,6 @@ private static Map autoCompleteSchemaRegistrySubject( return configuration.toMap(); } - public static void validateSinkTopic(ReadableConfig tableOptions) { - String errorMessageTemp = - "Flink Kafka sink currently only supports single topic, but got %s: %s."; - if (!isSingleTopic(tableOptions)) { - if (tableOptions.getOptional(TOPIC_PATTERN).isPresent()) { - throw new ValidationException( - String.format( - errorMessageTemp, - "'topic-pattern'", - tableOptions.get(TOPIC_PATTERN))); - } else { - throw new ValidationException( - String.format(errorMessageTemp, "'topic'", tableOptions.get(TOPIC))); - } - } - } - private static void autoCompleteSubject( Configuration configuration, String format, String subject) { ConfigOption subjectOption = @@ -549,7 +572,7 @@ private static boolean hasKafkaClientProperties(Map tableOptions // Inner classes // -------------------------------------------------------------------------------------------- - /** Kafka startup options. * */ + /** Kafka startup options. */ public static class StartupOptions { public StartupMode startupMode; @@ -557,6 +580,5 @@ public static class StartupOptions { public long startupTimestampMillis; } - private KafkaConnectorOptionsUtil() { - } + private KafkaConnectorOptionsUtil() {} } diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index abc1630dbfd..5aaae741dd3 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -98,6 +98,10 @@ import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.validateTableSinkOptions; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.validateTableSourceOptions; +/** + * Factory for creating configured instances of {@link KafkaDynamicSource} and {@link + * KafkaDynamicSink}. + */ @Internal public class KafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index 6ddfb712e60..efb9f214dc7 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -78,10 +78,13 @@ import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.VALUE_FORMAT; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.autoCompleteSchemaRegistrySubject; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createKeyFormatProjection; +import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.createValueFormatProjection; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getKafkaProperties; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopicPattern; import static org.apache.inlong.sort.kafka.table.KafkaConnectorOptionsUtil.getSourceTopics; +/** Upsert-Kafka factory. */ public class UpsertKafkaDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { public static final String IDENTIFIER = "upsert-kafka-inlong"; @@ -109,8 +112,8 @@ public Set> optionalOptions() { options.add(SINK_PARALLELISM); options.add(SINK_BUFFER_FLUSH_INTERVAL); options.add(SINK_BUFFER_FLUSH_MAX_ROWS); - options.add(KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG); options.add(Constants.INLONG_METRIC); + options.add(KafkaOptions.KAFKA_IGNORE_ALL_CHANGELOG); options.add(KafkaOptions.SINK_MULTIPLE_PARTITION_PATTERN); options.add(KafkaOptions.SINK_FIXED_IDENTIFIER); options.add(KafkaConnectorOptions.SINK_PARTITIONER); @@ -212,6 +215,26 @@ public DynamicTableSink createDynamicTableSink(Context context) { tableOptions.get(TRANSACTIONAL_ID_PREFIX)); } + private Tuple2 createKeyValueProjections(ResolvedCatalogTable catalogTable) { + ResolvedSchema schema = catalogTable.getResolvedSchema(); + // primary key should validated earlier + List keyFields = schema.getPrimaryKey().get().getColumns(); + DataType physicalDataType = schema.toPhysicalRowDataType(); + + Configuration tableOptions = Configuration.fromMap(catalogTable.getOptions()); + // upsert-kafka will set key.fields to primary key fields by default + tableOptions.set(KEY_FIELDS, keyFields); + + int[] keyProjection = createKeyFormatProjection(tableOptions, physicalDataType); + int[] valueProjection = createValueFormatProjection(tableOptions, physicalDataType); + + return Tuple2.of(keyProjection, valueProjection); + } + + // -------------------------------------------------------------------------------------------- + // Validation + // -------------------------------------------------------------------------------------------- + private static void validateSource( ReadableConfig tableOptions, Format keyFormat, @@ -289,126 +312,9 @@ private static void validateSinkBufferFlush(ReadableConfig tableOptions) { SINK_BUFFER_FLUSH_MAX_ROWS.key(), SINK_BUFFER_FLUSH_INTERVAL.key())); } - private Tuple2 createKeyValueProjections(ResolvedCatalogTable catalogTable) { - ResolvedSchema schema = catalogTable.getResolvedSchema(); - // primary key should validated earlier - List keyFields = schema.getPrimaryKey().get().getColumns(); - DataType physicalDataType = schema.toPhysicalRowDataType(); - - Configuration tableOptions = Configuration.fromMap(catalogTable.getOptions()); - // upsert-kafka will set key.fields to primary key fields by default - tableOptions.set(KEY_FIELDS, keyFields); - - int[] keyProjection = createKeyFormatProjection(tableOptions, physicalDataType); - int[] valueProjection = createValueFormatProjection(tableOptions, physicalDataType); - - return Tuple2.of(keyProjection, valueProjection); - } - - /** - * Creates an array of indices that determine which physical fields of the table schema to - * include in the key format and the order that those fields have in the key format. - * - *

See {@link KafkaConnectorOptions#KEY_FORMAT}, {@link KafkaConnectorOptions#KEY_FIELDS}, - * and {@link KafkaConnectorOptions#KEY_FIELDS_PREFIX} for more information. - */ - public static int[] createKeyFormatProjection( - ReadableConfig options, DataType physicalDataType) { - final LogicalType physicalType = physicalDataType.getLogicalType(); - Preconditions.checkArgument( - physicalType.is(LogicalTypeRoot.ROW), "Row data type expected."); - final Optional optionalKeyFormat = options.getOptional(KEY_FORMAT); - final Optional> optionalKeyFields = options.getOptional(KEY_FIELDS); - - if (!optionalKeyFormat.isPresent() && optionalKeyFields.isPresent()) { - throw new ValidationException( - String.format( - "The option '%s' can only be declared if a key format is defined using '%s'.", - KEY_FIELDS.key(), KEY_FORMAT.key())); - } else if (optionalKeyFormat.isPresent() - && (!optionalKeyFields.isPresent() || optionalKeyFields.get().size() == 0)) { - throw new ValidationException( - String.format( - "A key format '%s' requires the declaration of one or more of key fields using '%s'.", - KEY_FORMAT.key(), KEY_FIELDS.key())); - } - - if (!optionalKeyFormat.isPresent()) { - return new int[0]; - } - - final String keyPrefix = options.getOptional(KEY_FIELDS_PREFIX).orElse(""); - - final List keyFields = optionalKeyFields.get(); - final List physicalFields = LogicalTypeChecks.getFieldNames(physicalType); - return keyFields.stream() - .mapToInt( - keyField -> { - final int pos = physicalFields.indexOf(keyField); - // check that field name exists - if (pos < 0) { - throw new ValidationException( - String.format( - "Could not find the field '%s' in the table schema for usage in the key format. " - + "A key field must be a regular, physical column. " - + "The following columns can be selected in the '%s' option:\n" - + "%s", - keyField, KEY_FIELDS.key(), physicalFields)); - } - // check that field name is prefixed correctly - if (!keyField.startsWith(keyPrefix)) { - throw new ValidationException( - String.format( - "All fields in '%s' must be prefixed with '%s' when option '%s' " - + "is set but field '%s' is not prefixed.", - KEY_FIELDS.key(), - keyPrefix, - KEY_FIELDS_PREFIX.key(), - keyField)); - } - return pos; - }) - .toArray(); - } - - /** - * Creates an array of indices that determine which physical fields of the table schema to - * include in the value format. - * - *

See {@link KafkaConnectorOptions#VALUE_FORMAT}, {@link - * KafkaConnectorOptions#VALUE_FIELDS_INCLUDE}, and {@link - * KafkaConnectorOptions#KEY_FIELDS_PREFIX} for more information. - */ - public static int[] createValueFormatProjection( - ReadableConfig options, DataType physicalDataType) { - final LogicalType physicalType = physicalDataType.getLogicalType(); - Preconditions.checkArgument( - physicalType.is(LogicalTypeRoot.ROW), "Row data type expected."); - final int physicalFieldCount = LogicalTypeChecks.getFieldCount(physicalType); - final IntStream physicalFields = IntStream.range(0, physicalFieldCount); - - final String keyPrefix = options.getOptional(KEY_FIELDS_PREFIX).orElse(""); - - final KafkaConnectorOptions.ValueFieldsStrategy strategy = options.get(VALUE_FIELDS_INCLUDE); - if (strategy == KafkaConnectorOptions.ValueFieldsStrategy.ALL) { - if (keyPrefix.length() > 0) { - throw new ValidationException( - String.format( - "A key prefix is not allowed when option '%s' is set to '%s'. " - + "Set it to '%s' instead to avoid field overlaps.", - VALUE_FIELDS_INCLUDE.key(), - KafkaConnectorOptions.ValueFieldsStrategy.ALL, - KafkaConnectorOptions.ValueFieldsStrategy.EXCEPT_KEY)); - } - return physicalFields.toArray(); - } else if (strategy == KafkaConnectorOptions.ValueFieldsStrategy.EXCEPT_KEY) { - final int[] keyProjection = createKeyFormatProjection(options, physicalDataType); - return physicalFields - .filter(pos -> IntStream.of(keyProjection).noneMatch(k -> k == pos)) - .toArray(); - } - throw new TableException("Unknown value fields strategy:" + strategy); - } + // -------------------------------------------------------------------------------------------- + // Format wrapper + // -------------------------------------------------------------------------------------------- /** * It is used to wrap the decoding format and expose the desired changelog mode. It's only works From 2872c50b8d6c48150719277e49cbc8f7cb67a18d Mon Sep 17 00:00:00 2001 From: Hao Date: Wed, 23 Aug 2023 12:15:11 +0800 Subject: [PATCH 12/19] format code style --- .../inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java | 3 ++- .../sort/kafka/table/UpsertKafkaDynamicTableFactory.java | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java index 0e7a61493b3..06cf40ea49a 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaConnectorOptionsUtil.java @@ -580,5 +580,6 @@ public static class StartupOptions { public long startupTimestampMillis; } - private KafkaConnectorOptionsUtil() {} + private KafkaConnectorOptionsUtil() { + } } diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index efb9f214dc7..45af2b0c94c 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -32,7 +32,6 @@ import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSink; import org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSource; import org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode; -import org.apache.flink.table.api.TableException; import org.apache.flink.table.api.ValidationException; import org.apache.flink.table.catalog.ResolvedCatalogTable; import org.apache.flink.table.catalog.ResolvedSchema; @@ -49,21 +48,15 @@ import org.apache.flink.table.factories.FactoryUtil; import org.apache.flink.table.factories.SerializationFormatFactory; import org.apache.flink.table.types.DataType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.LogicalTypeRoot; -import org.apache.flink.table.types.logical.utils.LogicalTypeChecks; import org.apache.flink.types.RowKind; -import org.apache.flink.util.Preconditions; import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Objects; -import java.util.Optional; import java.util.Properties; import java.util.Set; -import java.util.stream.IntStream; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS; import static org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.KEY_FIELDS_PREFIX; From 596725754d159f1586b8e3f1587c753242095d3e Mon Sep 17 00:00:00 2001 From: Hao Date: Tue, 12 Sep 2023 23:48:27 +0800 Subject: [PATCH 13/19] update --- .../sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index 749e1e2b5f8..3d4936a1c1b 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -77,6 +77,7 @@ org.apache.kafka:* com.google.protobuf:* org.apache.flink:flink-connector-kafka + org.apache.flink:flink-connector-base com.amazonaws:* com.fasterxml.jackson.core:* commons-logging:commons-logging From 7cc226449462d8ad57f1ae9f1d5e5ce3cfb732b5 Mon Sep 17 00:00:00 2001 From: Hao Date: Thu, 14 Sep 2023 20:33:59 +0800 Subject: [PATCH 14/19] modify Validates the options of the factory --- .../inlong/sort/kafka/table/KafkaDynamicTableFactory.java | 6 ++++-- .../sort/kafka/table/UpsertKafkaDynamicTableFactory.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java index 5aaae741dd3..c87735ffc87 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/KafkaDynamicTableFactory.java @@ -189,10 +189,12 @@ public DynamicTableSource createDynamicTableSource(Context context) { final DecodingFormat> valueDecodingFormat = getValueDecodingFormat(helper); - helper.validateExcept(PROPERTIES_PREFIX); - final ReadableConfig tableOptions = helper.getOptions(); + final String valueFormatPrefix = tableOptions.getOptional(FORMAT).orElse(tableOptions.get(VALUE_FORMAT)); + LOG.info("valueFormatPrefix is {}", valueFormatPrefix); + helper.validateExcept(PROPERTIES_PREFIX, Constants.DIRTY_PREFIX, valueFormatPrefix); + validateTableSourceOptions(tableOptions); validatePKConstraints( diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java index 45af2b0c94c..2f61265c32e 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/src/main/java/org/apache/inlong/sort/kafka/table/UpsertKafkaDynamicTableFactory.java @@ -124,7 +124,7 @@ public DynamicTableSource createDynamicTableSource(Context context) { helper.discoverDecodingFormat(DeserializationFormatFactory.class, VALUE_FORMAT); // Validate the option data type. - helper.validateExcept(PROPERTIES_PREFIX); + helper.validateExcept(PROPERTIES_PREFIX, Constants.DIRTY_PREFIX); validateSource( tableOptions, keyDecodingFormat, From d71631b815bd8736bc8e9727919a6fd551a4378c Mon Sep 17 00:00:00 2001 From: Hao Date: Sat, 7 Oct 2023 21:51:34 +0800 Subject: [PATCH 15/19] update pom version --- .../sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index 3d4936a1c1b..78181b233f5 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -23,7 +23,7 @@ org.apache.inlong sort-connectors-v1.15 - 1.9.0-SNAPSHOT + 1.10.0-SNAPSHOT sort-connector-kafka-v1.15 From 1bb399956914dbde69290d274bf35a9ecfbbdf4d Mon Sep 17 00:00:00 2001 From: Hao Date: Wed, 6 Dec 2023 14:46:01 +0800 Subject: [PATCH 16/19] Added end-to-end testing in sort-connector-kafka-v1.15 --- inlong-sort/sort-core/pom.xml | 6 + .../sort-end-to-end-tests-v1.15/pom.xml | 14 +- .../inlong/sort/tests/KafkaE2EITCase.java | 230 ++++++++++++++++++ .../inlong/sort/tests/MysqlToRocksITCase.java | 13 +- .../resources/env/kafka_test_kafka_init.txt | 1 + .../test/resources/flinkSql/kafka_test.sql | 61 +++++ .../sort-connectors/kafka/pom.xml | 61 +++-- pom.xml | 20 +- 8 files changed, 371 insertions(+), 35 deletions(-) create mode 100644 inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java create mode 100644 inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/env/kafka_test_kafka_init.txt create mode 100644 inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql diff --git a/inlong-sort/sort-core/pom.xml b/inlong-sort/sort-core/pom.xml index a18d1773153..e8974e29f03 100644 --- a/inlong-sort/sort-core/pom.xml +++ b/inlong-sort/sort-core/pom.xml @@ -299,6 +299,12 @@ ${project.version} test + + org.apache.inlong + sort-connector-kafka-v1.15 + ${project.version} + test + diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/pom.xml b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/pom.xml index 07d8663682c..abdb6e6500b 100644 --- a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/pom.xml +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/pom.xml @@ -46,12 +46,14 @@ org.testcontainers postgresql - ${testcontainers.version} + + + org.testcontainers + kafka org.testcontainers mongodb - ${testcontainers.version} org.mongodb @@ -223,6 +225,14 @@ jar ${project.build.directory}/dependencies + + org.apache.inlong + sort-connector-kafka-v1.15 + ${project.version} + sort-connector-kafka.jar + jar + ${project.build.directory}/dependencies + diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java new file mode 100644 index 00000000000..f76a3530786 --- /dev/null +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.tests; + +import org.apache.inlong.sort.tests.utils.FlinkContainerTestEnv; +import org.apache.inlong.sort.tests.utils.JdbcProxy; +import org.apache.inlong.sort.tests.utils.MySqlContainer; +import org.apache.inlong.sort.tests.utils.PlaceholderResolver; +import org.apache.inlong.sort.tests.utils.StarRocksContainer; +import org.apache.inlong.sort.tests.utils.TestUtils; + +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.Container.ExecResult; +import org.testcontainers.containers.KafkaContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.utility.DockerImageName; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.time.Duration; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.apache.inlong.sort.tests.utils.StarRocksManager.INTER_CONTAINER_STAR_ROCKS_ALIAS; +import static org.apache.inlong.sort.tests.utils.StarRocksManager.STAR_ROCKS_LOG; +import static org.apache.inlong.sort.tests.utils.StarRocksManager.buildStarRocksImage; +import static org.apache.inlong.sort.tests.utils.StarRocksManager.getNewStarRocksImageName; +import static org.apache.inlong.sort.tests.utils.StarRocksManager.initializeStarRocksTable; + +/** + * End-to-end tests for sort-connector-kafka uber jar. + */ +public class KafkaE2EITCase extends FlinkContainerTestEnv { + + private static final Logger LOG = LoggerFactory.getLogger(KafkaE2EITCase.class); + + public static final Logger MYSQL_LOG = LoggerFactory.getLogger(MySqlContainer.class); + + public static final Logger KAFKA_LOG = LoggerFactory.getLogger(KafkaContainer.class); + + private static final Path kafkaJar = TestUtils.getResource("sort-connector-kafka.jar"); + private static final Path mysqlJar = TestUtils.getResource("sort-connector-mysql-cdc.jar"); + private static final Path starrocksJar = TestUtils.getResource("sort-connector-starrocks.jar"); + private static final Path mysqlJdbcJar = TestUtils.getResource("mysql-driver.jar"); + + private static final String sqlFile; + + static { + try { + URI kafkaSqlFile = + Objects.requireNonNull(KafkaE2EITCase.class.getResource("/flinkSql/kafka_test.sql")).toURI(); + sqlFile = Paths.get(kafkaSqlFile).toString(); + buildStarRocksImage(); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @ClassRule + public static final KafkaContainer KAFKA = + new KafkaContainer(DockerImageName.parse("confluentinc/cp-kafka:6.2.1")) + .withNetwork(NETWORK) + .withNetworkAliases("kafka") + .withEmbeddedZookeeper() + .withLogConsumer(new Slf4jLogConsumer(KAFKA_LOG)); + + @ClassRule + public static StarRocksContainer STAR_ROCKS = + (StarRocksContainer) new StarRocksContainer(getNewStarRocksImageName()) + .withExposedPorts(9030, 8030, 8040) + .withNetwork(NETWORK) + .withAccessToHost(true) + .withNetworkAliases(INTER_CONTAINER_STAR_ROCKS_ALIAS) + .withLogConsumer(new Slf4jLogConsumer(STAR_ROCKS_LOG)); + + @ClassRule + public static final MySqlContainer MYSQL_CONTAINER = + (MySqlContainer) new MySqlContainer(MySqlContainer.MySqlVersion.V8_0) + .withDatabaseName("test") + .withNetwork(NETWORK) + .withNetworkAliases("mysql") + .withLogConsumer(new Slf4jLogConsumer(MYSQL_LOG)); + + @Before + public void setup() { + waitUntilJobRunning(Duration.ofSeconds(30)); + initializeMysqlTable(); + initializeStarRocksTable(STAR_ROCKS); + } + + private void initializeMysqlTable() { + try { + Class.forName(MYSQL_CONTAINER.getDriverClassName()); + Connection conn = DriverManager + .getConnection(MYSQL_CONTAINER.getJdbcUrl(), MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword()); + Statement stat = conn.createStatement(); + stat.execute( + "CREATE TABLE test_input (\n" + + " id SERIAL,\n" + + " name VARCHAR(255) NOT NULL DEFAULT 'flink',\n" + + " description VARCHAR(512),\n" + + " PRIMARY KEY(id)\n" + + ");"); + stat.close(); + conn.close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @AfterClass + public static void teardown() { + if (KAFKA != null) { + KAFKA.stop(); + } + + if (MYSQL_CONTAINER != null) { + MYSQL_CONTAINER.stop(); + } + + if (STAR_ROCKS != null) { + STAR_ROCKS.stop(); + } + } + + private void initializeKafkaTable(String topic) { + String fileName = "kafka_test_kafka_init.txt"; + int port = KafkaContainer.ZOOKEEPER_PORT; + + Map properties = new HashMap<>(); + properties.put("TOPIC", topic); + properties.put("ZOOKEEPER_PORT", port); + + try { + String createKafkaStatement = getCreateStatement(fileName, properties); + ExecResult result = KAFKA.execInContainer("bash", "-c", createKafkaStatement); + LOG.info("Create kafka topic: {}, std: {}", createKafkaStatement, result.getStdout()); + if (result.getExitCode() != 0) { + throw new RuntimeException("Init kafka topic failed. Exit code:" + result.getExitCode()); + } + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + } + + private String getCreateStatement(String fileName, Map properties) { + URL url = Objects.requireNonNull(KafkaE2EITCase.class.getResource("/env/" + fileName)); + + try { + Path file = Paths.get(url.toURI()); + return PlaceholderResolver.getDefaultResolver().resolveByMap( + new String(Files.readAllBytes(file), StandardCharsets.UTF_8), + properties); + } catch (IOException | URISyntaxException e) { + throw new RuntimeException(e); + } + } + + /** + * Test flink sql mysql cdc to hive + * + * @throws Exception The exception may throw when execute the case + */ + @Test + public void testKafkaWithSqlFile() throws Exception { + final String topic = "test-topic"; + initializeKafkaTable(topic); + + submitSQLJob(sqlFile, kafkaJar, starrocksJar, mysqlJar, mysqlJdbcJar); + waitUntilJobRunning(Duration.ofSeconds(10)); + + // generate input + try (Connection conn = DriverManager.getConnection(MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), MYSQL_CONTAINER.getPassword()); + Statement stat = conn.createStatement()) { + stat.execute("INSERT INTO test_input VALUES (1,'jacket','water resistant white wind breaker');"); + stat.execute("INSERT INTO test_input VALUES (2,'scooter','Big 2-wheel scooter ');"); + } catch (SQLException e) { + LOG.error("Update table for CDC failed.", e); + throw e; + } + + JdbcProxy proxy = new JdbcProxy(STAR_ROCKS.getJdbcUrl(), STAR_ROCKS.getUsername(), + STAR_ROCKS.getPassword(), + STAR_ROCKS.getDriverClassName()); + + List expectResult = Arrays.asList( + "1,jacket,water resistant white wind breaker", + "2,scooter,Big 2-wheel scooter "); + proxy.checkResultWithTimeout( + expectResult, + "test_output1", + 3, + 60000L); + } +} \ No newline at end of file diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/MysqlToRocksITCase.java b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/MysqlToRocksITCase.java index bbeccd04a5a..51501772a9e 100644 --- a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/MysqlToRocksITCase.java +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/MysqlToRocksITCase.java @@ -39,7 +39,7 @@ import java.sql.SQLException; import java.sql.Statement; import java.time.Duration; -import java.util.Arrays; +import java.util.Collections; import java.util.List; import static org.apache.inlong.sort.tests.utils.StarRocksManager.INTER_CONTAINER_STAR_ROCKS_ALIAS; @@ -155,12 +155,11 @@ public void testMysqlUpdateAndDelete() throws Exception { throw e; } - JdbcProxy proxy = - new JdbcProxy(STAR_ROCKS.getJdbcUrl(), STAR_ROCKS.getUsername(), - STAR_ROCKS.getPassword(), - STAR_ROCKS.getDriverClassName()); - List expectResult = - Arrays.asList("2,tom,Big 2-wheel scooter "); + JdbcProxy proxy = new JdbcProxy(STAR_ROCKS.getJdbcUrl(), STAR_ROCKS.getUsername(), + STAR_ROCKS.getPassword(), + STAR_ROCKS.getDriverClassName()); + + List expectResult = Collections.singletonList("2,tom,Big 2-wheel scooter "); proxy.checkResultWithTimeout( expectResult, "test_output1", diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/env/kafka_test_kafka_init.txt b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/env/kafka_test_kafka_init.txt new file mode 100644 index 00000000000..b2f31d78fa4 --- /dev/null +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/env/kafka_test_kafka_init.txt @@ -0,0 +1 @@ +kafka-topics --create --topic ${TOPIC} --replication-factor 1 --partitions 1 --zookeeper localhost:${ZOOKEEPER_PORT} \ No newline at end of file diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql new file mode 100644 index 00000000000..8792211b7a4 --- /dev/null +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql @@ -0,0 +1,61 @@ +CREATE TABLE test_input ( + `id` INT primary key, + name STRING, + description STRING +) WITH ( + 'connector' = 'mysql-cdc-inlong', + 'hostname' = 'mysql', + 'port' = '3306', + 'username' = 'root', + 'password' = 'inlong', + 'database-name' = 'test', + 'table-name' = 'test_input', + 'scan.incremental.snapshot.enabled' = 'false', + 'jdbc.properties.useSSL' = 'false', + 'jdbc.properties.allowPublicKeyRetrieval' = 'true' +); + +CREATE TABLE kafka_load ( + `id` INT NOT NULL primary key, + name STRING, + description STRING +) WITH ( + 'connector' = 'upsert-kafka-inlong', + 'topic' = 'test-topic', + 'properties.bootstrap.servers' = 'kafka:9092', + 'key.format' = 'csv', + 'value.format' = 'csv' +); + +CREATE TABLE kafka_extract ( + `id` INT NOT NULL, + name STRING, + description STRING +) WITH ( + 'connector' = 'kafka', + 'topic' = 'test-topic', + 'properties.bootstrap.servers' = 'kafka:9092', + 'properties.group.id' = 'testGroup', + 'scan.startup.mode' = 'earliest-offset', + 'format' = 'csv' +); + +CREATE TABLE test_output ( + `id` INT primary key, + name STRING, + description STRING +) WITH ( + 'connector' = 'starrocks-inlong', + 'jdbc-url' = 'jdbc:mysql://starrocks:9030', + 'load-url'='starrocks:8030', + 'database-name'='test', + 'table-name' = 'test_output1', + 'username' = 'inlong', + 'password' = 'inlong', + 'sink.properties.format' = 'json', + 'sink.properties.strip_outer_array' = 'true', + 'sink.buffer-flush.interval-ms' = '1000' +); + +INSERT INTO kafka_load select * from test_input; +INSERT INTO test_output select * from kafka_extract; \ No newline at end of file diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index 78181b233f5..393f64b776a 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -28,7 +28,7 @@ sort-connector-kafka-v1.15 jar - Apache InLong - Sort-connector-kafka-v1.15 + Apache InLong - Sort-connector-kafka ${project.parent.parent.parent.parent.parent.basedir} @@ -46,15 +46,13 @@ org.apache.inlong - audit-sdk + sort-common ${project.version} - compile org.apache.inlong - sort-common + audit-sdk ${project.version} - compile @@ -75,16 +73,18 @@ org.apache.inlong:* org.apache.kafka:* - com.google.protobuf:* org.apache.flink:flink-connector-kafka org.apache.flink:flink-connector-base - com.amazonaws:* - com.fasterxml.jackson.core:* - commons-logging:commons-logging + + org.apache.flink:flink-shaded-guava org.apache.httpcomponents:* - software.amazon.ion:* - joda-time:* org.apache.commons:commons-lang3 + com.google.protobuf:* + joda-time:* + com.fasterxml.jackson.core:* + com.amazonaws:* + software.amazon.ion:* + commons-logging:commons-logging @@ -105,45 +105,52 @@ org/apache/inlong/** META-INF/services/org.apache.flink.table.factories.Factory + META-INF/services/org.apache.flink.table.factories.TableFactory - org.apache.inlong.sort.base - org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.base + org.apache.kafka + org.apache.inlong.sort.kafka.shaded.org.apache.kafka - org.apache.kafka - org.apache.flink.kafka.shaded.org.apache.kafka + org.apache.commons.logging + org.apache.inlong.sort.kafka.shaded.org.apache.commons.logging - com.amazonaws - org.apache.inlong.sort.kafka.shaded.com.amazonaws + org.apache.commons.lang3 + org.apache.inlong.sort.kafka.shaded.org.apache.commons.lang3 - com.fasterxml.jackson.core - org.apache.inlong.sort.kafka.shaded.com.fasterxml.jackson.core + org.apache.http + org.apache.inlong.sort.kafka.shaded.org.apache.http + - org.apache.commons.logging - org.apache.inlong.sort.kafka.shaded.org.apache.commons.logging + com.google + org.apache.inlong.sort.kafka.shaded.com.google - org.apache.http - org.apache.inlong.sort.kafka.shaded.org.apache.http + com.amazonaws + org.apache.inlong.sort.kafka.shaded.com.amazonaws software.amazon.ion org.apache.inlong.sort.kafka.shaded.software.amazon.ion + + com.fasterxml.jackson + org.apache.inlong.sort.kafka.shaded.com.fasterxml.jackson + org.joda.time org.apache.inlong.sort.kafka.shaded.org.joda.time + - org.apache.commons.lang3 - org.apache.inlong.sort.kafka.shaded.org.apache.commons.lang3 + org.apache.inlong.sort.base + org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.base org.apache.inlong.sort.configuration @@ -153,6 +160,10 @@ org.apache.inlong.sort.protocol org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.protocol + + org.apache.inlong.sort.schema + org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.schema + org.apache.inlong.sort.util org.apache.inlong.sort.kafka.shaded.org.apache.inlong.sort.util diff --git a/pom.xml b/pom.xml index e8f8697ab38..bd2daa14b1f 100644 --- a/pom.xml +++ b/pom.xml @@ -1192,6 +1192,12 @@ ${testcontainers.version} test + + org.testcontainers + jdbc + ${testcontainers.version} + test + org.testcontainers mysql @@ -1200,7 +1206,19 @@ org.testcontainers - jdbc + postgresql + ${testcontainers.version} + test + + + org.testcontainers + mongodb + ${testcontainers.version} + test + + + org.testcontainers + clickhouse ${testcontainers.version} test From 37891075fa489579c192f2d20f8a70a2af6da84c Mon Sep 17 00:00:00 2001 From: Hao Date: Wed, 6 Dec 2023 15:02:36 +0800 Subject: [PATCH 17/19] update --- .../src/test/resources/flinkSql/kafka_test.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql index 8792211b7a4..5bda3b93666 100644 --- a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/resources/flinkSql/kafka_test.sql @@ -32,7 +32,7 @@ CREATE TABLE kafka_extract ( name STRING, description STRING ) WITH ( - 'connector' = 'kafka', + 'connector' = 'kafka-inlong', 'topic' = 'test-topic', 'properties.bootstrap.servers' = 'kafka:9092', 'properties.group.id' = 'testGroup', From 9aebad3c087e26237d29d10ee67816804b4c7189 Mon Sep 17 00:00:00 2001 From: Hao Date: Wed, 6 Dec 2023 15:26:08 +0800 Subject: [PATCH 18/19] update version --- .../sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml index 393f64b776a..61fcc35247f 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/pom.xml @@ -23,7 +23,7 @@ org.apache.inlong sort-connectors-v1.15 - 1.10.0-SNAPSHOT + 1.11.0-SNAPSHOT sort-connector-kafka-v1.15 From 3ca6c1cc2aa130e28abf561690203a9a1ea9b65e Mon Sep 17 00:00:00 2001 From: Hao Date: Wed, 6 Dec 2023 16:17:05 +0800 Subject: [PATCH 19/19] update --- .../test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java index f76a3530786..1399fe2f6f7 100644 --- a/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java +++ b/inlong-sort/sort-end-to-end-tests/sort-end-to-end-tests-v1.15/src/test/java/org/apache/inlong/sort/tests/KafkaE2EITCase.java @@ -191,7 +191,7 @@ private String getCreateStatement(String fileName, Map propertie } /** - * Test flink sql mysql cdc to hive + * Test flink sql mysql cdc to starrocks. * * @throws Exception The exception may throw when execute the case */