diff --git a/.github/workflows/flink_cdc.yml b/.github/workflows/flink_cdc.yml index 2ccb59c8638..84d62776aba 100644 --- a/.github/workflows/flink_cdc.yml +++ b/.github/workflows/flink_cdc.yml @@ -73,12 +73,14 @@ jobs: java-version: [ '8' ] flink-version: ['1.19.1', '1.20.0'] module: [ 'pipeline_e2e' ] + parallelism: [ 1, 4 ] name: Pipeline E2E Tests uses: ./.github/workflows/flink_cdc_base.yml with: java-version: ${{ matrix.java-version }} flink-version: ${{ matrix.flink-version }} module: ${{ matrix.module }} + parallelism: ${{ matrix.parallelism }} source_e2e: strategy: matrix: diff --git a/.github/workflows/flink_cdc_base.yml b/.github/workflows/flink_cdc_base.yml index 707282fa177..ddb650ab6a0 100644 --- a/.github/workflows/flink_cdc_base.yml +++ b/.github/workflows/flink_cdc_base.yml @@ -31,6 +31,11 @@ on: description: "Flink CDC module to test against." required: true type: string + parallelism: + description: "Flink parallelism." + required: false + type: number + default: 4 custom-maven-parameter: description: "Custom maven parameter." required: false @@ -206,7 +211,7 @@ jobs: build_maven_parameter="${build_maven_parameter:+$build_maven_parameter }${{ inputs.custom-maven-parameter }}" - mvn --no-snapshot-updates -B -DskipTests -pl $compile_modules -am install && mvn --no-snapshot-updates -B $build_maven_parameter -pl $modules verify + mvn --no-snapshot-updates -B -DskipTests -pl $compile_modules -am install && mvn --no-snapshot-updates -B $build_maven_parameter -pl $modules -DspecifiedParallelism=${{ inputs.parallelism }} verify - name: Print JVM thread dumps when cancelled if: ${{ failure() }} diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AddColumnEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AddColumnEvent.java index 3966ff46ffc..bba105c58ee 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AddColumnEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AddColumnEvent.java @@ -178,4 +178,9 @@ public TableId tableId() { public SchemaChangeEventType getType() { return SchemaChangeEventType.ADD_COLUMN; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new AddColumnEvent(newTableId, addedColumns); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AlterColumnTypeEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AlterColumnTypeEvent.java index 51acb43198a..2257f134d96 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AlterColumnTypeEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/AlterColumnTypeEvent.java @@ -146,4 +146,9 @@ public boolean trimRedundantChanges() { public SchemaChangeEventType getType() { return SchemaChangeEventType.ALTER_COLUMN_TYPE; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new AlterColumnTypeEvent(newTableId, typeMapping, oldTypeMapping); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/CreateTableEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/CreateTableEvent.java index 6d3e547172d..36ef4bcc50c 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/CreateTableEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/CreateTableEvent.java @@ -77,4 +77,9 @@ public TableId tableId() { public SchemaChangeEventType getType() { return SchemaChangeEventType.CREATE_TABLE; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new CreateTableEvent(newTableId, schema); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DataChangeEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DataChangeEvent.java index da4d454ddee..63cade18824 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DataChangeEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DataChangeEvent.java @@ -167,6 +167,16 @@ public static DataChangeEvent projectAfter( dataChangeEvent.meta); } + /** Updates the {@link TableId} info of current data change event. */ + public static DataChangeEvent route(DataChangeEvent dataChangeEvent, TableId tableId) { + return new DataChangeEvent( + tableId, + dataChangeEvent.before, + dataChangeEvent.after, + dataChangeEvent.op, + dataChangeEvent.meta); + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropColumnEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropColumnEvent.java index 76cabbde7b7..0ae5cf2f978 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropColumnEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropColumnEvent.java @@ -81,4 +81,9 @@ public TableId tableId() { public SchemaChangeEventType getType() { return SchemaChangeEventType.DROP_COLUMN; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new DropColumnEvent(newTableId, droppedColumnNames); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropTableEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropTableEvent.java index dd5efdd0818..382d9d7b61c 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropTableEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/DropTableEvent.java @@ -68,4 +68,9 @@ public TableId tableId() { public SchemaChangeEventType getType() { return SchemaChangeEventType.DROP_TABLE; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new DropTableEvent(newTableId); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/FlushEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/FlushEvent.java index 798552e0499..787e09912ad 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/FlushEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/FlushEvent.java @@ -17,6 +17,9 @@ package org.apache.flink.cdc.common.event; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + import java.util.Objects; /** @@ -25,21 +28,35 @@ */ public class FlushEvent implements Event { - /** The schema changes from which table. */ - private final TableId tableId; + /** + * The schema changes from which table. If tableId is null, it means this {@code FlushEvent} + * should flush all pending events, no matter which table it belongs to. + */ + private final @Nullable TableId tableId; /** - * Nonce code to distinguish flush events corresponding to each schema change event from - * different subTasks. + * With the Schema Operator - Registry topology, a nonce code is required to distinguish flush + * events corresponding to each schema change event from different subTasks. */ private final long nonce; - public FlushEvent(TableId tableId, long nonce) { + /** With the distributed topology, we don't need to track flush events for various tables. */ + private static final FlushEvent FLUSH_ALL_EVENT = new FlushEvent(null, -1L); + + protected FlushEvent(@Nullable TableId tableId, long nonce) { this.tableId = tableId; this.nonce = nonce; } - public TableId getTableId() { + public static FlushEvent ofAll() { + return FLUSH_ALL_EVENT; + } + + public static FlushEvent of(@Nonnull TableId tableId, long nonce) { + return new FlushEvent(tableId, nonce); + } + + public @Nullable TableId getTableId() { return tableId; } @@ -66,6 +83,10 @@ public int hashCode() { @Override public String toString() { - return "FlushEvent{" + "tableId=" + tableId + ", nonce=" + nonce + '}'; + if (tableId == null) { + return "FlushEvent{ << not table-specific >> }"; + } else { + return "FlushEvent{" + "tableId=" + tableId + ", nonce=" + nonce + '}'; + } } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/RenameColumnEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/RenameColumnEvent.java index 8bde3787207..f38d1fd04d9 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/RenameColumnEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/RenameColumnEvent.java @@ -78,4 +78,9 @@ public TableId tableId() { public SchemaChangeEventType getType() { return SchemaChangeEventType.RENAME_COLUMN; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new RenameColumnEvent(newTableId, nameMapping); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/SchemaChangeEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/SchemaChangeEvent.java index d5596e3b3de..fdb6041417c 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/SchemaChangeEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/SchemaChangeEvent.java @@ -29,4 +29,7 @@ public interface SchemaChangeEvent extends ChangeEvent, Serializable { /** Returns its {@link SchemaChangeEventType}. */ SchemaChangeEventType getType(); + + /** Creates a copy of {@link SchemaChangeEvent} with new {@link TableId}. */ + SchemaChangeEvent copy(TableId newTableId); } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/TruncateTableEvent.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/TruncateTableEvent.java index 2144ff2837d..29ef6d6b8e3 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/TruncateTableEvent.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/event/TruncateTableEvent.java @@ -68,4 +68,9 @@ public TableId tableId() { public SchemaChangeEventType getType() { return SchemaChangeEventType.TRUNCATE_TABLE; } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return new TruncateTableEvent(newTableId); + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/route/RouteRule.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/route/RouteRule.java index 4fbfb61b661..2974732fb1c 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/route/RouteRule.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/route/RouteRule.java @@ -24,6 +24,10 @@ public class RouteRule implements Serializable { private static final long serialVersionUID = 1L; + public RouteRule(String sourceTable, String sinkTable) { + this(sourceTable, sinkTable, null); + } + public RouteRule(String sourceTable, String sinkTable, String replaceSymbol) { this.sourceTable = sourceTable; this.sinkTable = sinkTable; diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/source/DataSource.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/source/DataSource.java index 1027207a416..54da0a9ff5f 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/source/DataSource.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/source/DataSource.java @@ -17,6 +17,7 @@ package org.apache.flink.cdc.common.source; +import org.apache.flink.cdc.common.annotation.Experimental; import org.apache.flink.cdc.common.annotation.PublicEvolving; /** @@ -31,4 +32,18 @@ public interface DataSource { /** Get the {@link MetadataAccessor} for accessing metadata from external systems. */ MetadataAccessor getMetadataAccessor(); + + /** + * Indicating if this source guarantees for each TableId, it will not be evolved differently + * among subTasks. If returns {@code false}, you'll get a regular operator topology that is + * compatible with single-incremented sources like MySQL. Returns {@code true} for sources that + * does not maintain a globally sequential schema change events stream, like MongoDB or Kafka. + *
+ * Note that new topology still an experimental feature. Return {@code false} by default to + * avoid unexpected behaviors. + */ + @Experimental + default boolean canContainDistributedTables() { + return false; + } } diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaMergingUtils.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaMergingUtils.java new file mode 100644 index 00000000000..c547eee7687 --- /dev/null +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaMergingUtils.java @@ -0,0 +1,815 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.common.utils; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.cdc.common.annotation.PublicEvolving; +import org.apache.flink.cdc.common.annotation.VisibleForTesting; +import org.apache.flink.cdc.common.data.DecimalData; +import org.apache.flink.cdc.common.data.LocalZonedTimestampData; +import org.apache.flink.cdc.common.data.StringData; +import org.apache.flink.cdc.common.data.TimestampData; +import org.apache.flink.cdc.common.data.ZonedTimestampData; +import org.apache.flink.cdc.common.data.binary.BinaryStringData; +import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.types.ArrayType; +import org.apache.flink.cdc.common.types.BigIntType; +import org.apache.flink.cdc.common.types.BinaryType; +import org.apache.flink.cdc.common.types.BooleanType; +import org.apache.flink.cdc.common.types.CharType; +import org.apache.flink.cdc.common.types.DataType; +import org.apache.flink.cdc.common.types.DataTypeFamily; +import org.apache.flink.cdc.common.types.DataTypeRoot; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.common.types.DateType; +import org.apache.flink.cdc.common.types.DecimalType; +import org.apache.flink.cdc.common.types.DoubleType; +import org.apache.flink.cdc.common.types.FloatType; +import org.apache.flink.cdc.common.types.IntType; +import org.apache.flink.cdc.common.types.LocalZonedTimestampType; +import org.apache.flink.cdc.common.types.MapType; +import org.apache.flink.cdc.common.types.RowType; +import org.apache.flink.cdc.common.types.SmallIntType; +import org.apache.flink.cdc.common.types.TimeType; +import org.apache.flink.cdc.common.types.TimestampType; +import org.apache.flink.cdc.common.types.TinyIntType; +import org.apache.flink.cdc.common.types.VarBinaryType; +import org.apache.flink.cdc.common.types.VarCharType; +import org.apache.flink.cdc.common.types.ZonedTimestampType; + +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableList; +import org.apache.flink.shaded.guava31.com.google.common.collect.Streams; +import org.apache.flink.shaded.guava31.com.google.common.io.BaseEncoding; + +import javax.annotation.Nullable; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Utils for merging {@link Schema}s and {@link DataType}s. Prefer using this over {@link + * SchemaUtils} to get consistent schema merging behaviors. + */ +@PublicEvolving +public class SchemaMergingUtils { + /** + * Checking if given {@code upcomingSchema} could be fit into currently known {@code + * currentSchema}. Current schema could be null (as the cold opening state, and in this case it + * always returns {@code false}) but the upcoming schema should never be null. + */ + public static boolean isSchemaCompatible( + @Nullable Schema currentSchema, Schema upcomingSchema) { + if (currentSchema == null) { + return false; + } + Map currentColumnTypes = + currentSchema.getColumns().stream() + .collect(Collectors.toMap(Column::getName, Column::getType)); + List upcomingColumns = upcomingSchema.getColumns(); + + for (Column upcomingColumn : upcomingColumns) { + String columnName = upcomingColumn.getName(); + DataType upcomingColumnType = upcomingColumn.getType(); + DataType currentColumnType = currentColumnTypes.get(columnName); + + if (!isDataTypeCompatible(currentColumnType, upcomingColumnType)) { + return false; + } + } + return true; + } + + /** + * Try to merge {@code upcomingSchema} into {@code currentSchema} by performing lenient schema + * changes. Returns evolved schema and corresponding schema change event interpretations. + */ + public static Schema getLeastCommonSchema( + @Nullable Schema currentSchema, Schema upcomingSchema) { + // No current schema record, we need to create it first. + if (currentSchema == null) { + return upcomingSchema; + } + + // Current schema is compatible with upcoming ones, just return it and perform no schema + // evolution. + if (isSchemaCompatible(currentSchema, upcomingSchema)) { + return currentSchema; + } + + Map newTypeMapping = new HashMap<>(); + + Map currentColumns = + currentSchema.getColumns().stream() + .collect(Collectors.toMap(Column::getName, col -> col)); + List upcomingColumns = upcomingSchema.getColumns(); + + List appendedColumns = new ArrayList<>(); + + for (Column upcomingColumn : upcomingColumns) { + String columnName = upcomingColumn.getName(); + DataType upcomingColumnType = upcomingColumn.getType(); + if (currentColumns.containsKey(columnName)) { + Column currentColumn = currentColumns.get(columnName); + DataType currentColumnType = currentColumn.getType(); + DataType leastCommonType = + getLeastCommonType(currentColumnType, upcomingColumnType); + if (!Objects.equals(leastCommonType, currentColumnType)) { + newTypeMapping.put(columnName, leastCommonType); + } + } else { + appendedColumns.add(upcomingColumn); + } + } + + List commonColumns = new ArrayList<>(); + for (Column column : currentSchema.getColumns()) { + if (newTypeMapping.containsKey(column.getName())) { + commonColumns.add(column.copy(newTypeMapping.get(column.getName()))); + } else { + commonColumns.add(column); + } + } + + commonColumns.addAll(appendedColumns); + return currentSchema.copy(commonColumns); + } + + /** Merge compatible schemas. */ + public static Schema getCommonSchema(List schemas) { + if (schemas.isEmpty()) { + return null; + } else if (schemas.size() == 1) { + return schemas.get(0); + } else { + Schema outputSchema = null; + for (Schema schema : schemas) { + outputSchema = getLeastCommonSchema(outputSchema, schema); + } + return outputSchema; + } + } + + /** + * Generating what schema change events we need to do by converting compatible {@code + * beforeSchema} to {@code afterSchema}. + */ + public static List getSchemaDifference( + TableId tableId, @Nullable Schema beforeSchema, Schema afterSchema) { + if (beforeSchema == null) { + return Collections.singletonList(new CreateTableEvent(tableId, afterSchema)); + } + + Map beforeColumns = + beforeSchema.getColumns().stream() + .collect(Collectors.toMap(Column::getName, col -> col)); + + Map oldTypeMapping = new HashMap<>(); + Map newTypeMapping = new HashMap<>(); + List appendedColumns = new ArrayList<>(); + + String afterWhichColumnPosition = null; + for (Column afterColumn : afterSchema.getColumns()) { + String columnName = afterColumn.getName(); + DataType afterType = afterColumn.getType(); + if (beforeColumns.containsKey(columnName)) { + DataType beforeType = beforeColumns.get(columnName).getType(); + if (!Objects.equals(beforeType, afterType)) { + oldTypeMapping.put(columnName, beforeType); + newTypeMapping.put(columnName, afterType); + } + } else { + if (afterWhichColumnPosition == null) { + appendedColumns.add( + new AddColumnEvent.ColumnWithPosition( + afterColumn, AddColumnEvent.ColumnPosition.FIRST, null)); + } else { + appendedColumns.add( + new AddColumnEvent.ColumnWithPosition( + afterColumn, + AddColumnEvent.ColumnPosition.AFTER, + afterWhichColumnPosition)); + } + } + afterWhichColumnPosition = afterColumn.getName(); + } + + List schemaChangeEvents = new ArrayList<>(); + if (!appendedColumns.isEmpty()) { + schemaChangeEvents.add(new AddColumnEvent(tableId, appendedColumns)); + } + + if (!newTypeMapping.isEmpty()) { + schemaChangeEvents.add( + new AlterColumnTypeEvent(tableId, newTypeMapping, oldTypeMapping)); + } + + return schemaChangeEvents; + } + + /** + * Coercing {@code upcomingRow} with {@code upcomingTypes} schema into {@code currentTypes} + * schema. Invoking this method implicitly assumes that {@code isSchemaCompatible(currentSchema, + * upcomingSchema)} returns true. Otherwise, some upstream records might be lost. + */ + public static Object[] coerceRow( + String timezone, + Schema currentSchema, + Schema upcomingSchema, + List upcomingRow) { + return coerceRow(timezone, currentSchema, upcomingSchema, upcomingRow, true); + } + + /** + * Coercing {@code upcomingRow} with {@code upcomingTypes} schema into {@code currentTypes} + * schema. Invoking this method implicitly assumes that {@code isSchemaCompatible(currentSchema, + * upcomingSchema)} returns true. Otherwise, some upstream records might be lost. + */ + public static Object[] coerceRow( + String timezone, + Schema currentSchema, + Schema upcomingSchema, + List upcomingRow, + boolean toleranceMode) { + List currentColumns = currentSchema.getColumns(); + Map upcomingColumnTypes = + upcomingSchema.getColumns().stream() + .collect(Collectors.toMap(Column::getName, Column::getType)); + Map upcomingColumnObjects = + Streams.zip( + upcomingSchema.getColumnNames().stream(), + upcomingRow.stream(), + Tuple2::of) + .collect(Collectors.toMap(t -> t.f0, t -> t.f1)); + Object[] coercedRow = new Object[currentSchema.getColumnCount()]; + + for (int i = 0; i < currentSchema.getColumnCount(); i++) { + Column currentColumn = currentColumns.get(i); + String columnName = currentColumn.getName(); + if (upcomingColumnTypes.containsKey(columnName)) { + + DataType upcomingType = upcomingColumnTypes.get(columnName); + DataType currentType = currentColumn.getType(); + + if (Objects.equals(upcomingType, currentType)) { + coercedRow[i] = upcomingColumnObjects.get(columnName); + } else { + try { + coercedRow[i] = + coerceObject( + timezone, + upcomingColumnObjects.get(columnName), + upcomingColumnTypes.get(columnName), + currentColumn.getType()); + } catch (IllegalArgumentException e) { + if (!toleranceMode) { + throw e; + } + } + } + } else { + coercedRow[i] = null; + } + } + return coercedRow; + } + + @VisibleForTesting + static boolean isDataTypeCompatible(@Nullable DataType currentType, DataType upcomingType) { + // If two types are identical, they're compatible of course. + if (Objects.equals(currentType, upcomingType)) { + return true; + } + + // Or, if an upcoming column does not exist in current schema, it can't be compatible. + if (currentType == null) { + return false; + } + + // Or, check if upcomingType is presented in the type merging tree. + return TYPE_MERGING_TREE.get(upcomingType.getClass()).contains(currentType); + } + + @VisibleForTesting + static DataType getLeastCommonType(DataType currentType, DataType targetType) { + // Ignore nullability during data type merge, and restore it later + boolean nullable = currentType.isNullable() || targetType.isNullable(); + currentType = currentType.notNull(); + targetType = targetType.notNull(); + + if (Objects.equals(currentType, targetType)) { + return currentType.copy(nullable); + } + + // For TIMESTAMP and EXACT_NUMERIC types, we have fine-grained type merging logic. + if (currentType.is(DataTypeFamily.TIMESTAMP) && targetType.is(DataTypeFamily.TIMESTAMP)) { + return mergeTimestampType(currentType, targetType).copy(nullable); + } + + if (currentType instanceof DecimalType || targetType instanceof DecimalType) { + return mergeDecimalType(currentType, targetType).copy(nullable); + } + + List currentTypeTree = TYPE_MERGING_TREE.get(currentType.getClass()); + List targetTypeTree = TYPE_MERGING_TREE.get(targetType.getClass()); + + for (DataType type : currentTypeTree) { + if (targetTypeTree.contains(type)) { + return type.copy(nullable); + } + } + + // The most universal type and our final resort: STRING. + return DataTypes.STRING().copy(nullable); + } + + @VisibleForTesting + static DataType mergeTimestampType(DataType lType, DataType rType) { + if (lType instanceof TimestampType && rType instanceof TimestampType) { + return DataTypes.TIMESTAMP( + Math.max( + ((TimestampType) lType).getPrecision(), + ((TimestampType) rType).getPrecision())); + } else if (lType instanceof ZonedTimestampType && rType instanceof ZonedTimestampType) { + return DataTypes.TIMESTAMP_TZ( + Math.max( + ((ZonedTimestampType) lType).getPrecision(), + ((ZonedTimestampType) rType).getPrecision())); + } else if (lType instanceof LocalZonedTimestampType + && rType instanceof LocalZonedTimestampType) { + return DataTypes.TIMESTAMP_LTZ( + Math.max( + ((LocalZonedTimestampType) lType).getPrecision(), + ((LocalZonedTimestampType) rType).getPrecision())); + } else { + return DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION); + } + } + + @VisibleForTesting + static DataType mergeDecimalType(DataType lType, DataType rType) { + if (lType instanceof DecimalType && rType instanceof DecimalType) { + // Merge two decimal types + DecimalType lhsDecimal = (DecimalType) lType; + DecimalType rhsDecimal = (DecimalType) rType; + int resultIntDigits = + Math.max( + lhsDecimal.getPrecision() - lhsDecimal.getScale(), + rhsDecimal.getPrecision() - rhsDecimal.getScale()); + int resultScale = Math.max(lhsDecimal.getScale(), rhsDecimal.getScale()); + Preconditions.checkArgument( + resultIntDigits + resultScale <= DecimalType.MAX_PRECISION, + String.format( + "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available", + lType, + rType, + resultIntDigits + resultScale, + DecimalType.MAX_PRECISION)); + return DataTypes.DECIMAL(resultIntDigits + resultScale, resultScale); + } else if (lType instanceof DecimalType && rType.is(DataTypeFamily.EXACT_NUMERIC)) { + // Merge decimal and int + return mergeExactNumericsIntoDecimal((DecimalType) lType, rType); + } else if (rType instanceof DecimalType && lType.is(DataTypeFamily.EXACT_NUMERIC)) { + // Merge decimal and int + return mergeExactNumericsIntoDecimal((DecimalType) rType, lType); + } else { + return DataTypes.STRING(); + } + } + + private static DataType mergeExactNumericsIntoDecimal( + DecimalType decimalType, DataType otherType) { + int resultPrecision = + Math.max( + decimalType.getPrecision(), + decimalType.getScale() + getNumericPrecision(otherType)); + if (resultPrecision <= DecimalType.MAX_PRECISION) { + return DataTypes.DECIMAL(resultPrecision, decimalType.getScale()); + } else { + return DataTypes.STRING(); + } + } + + @VisibleForTesting + public static int getNumericPrecision(DataType dataType) { + if (dataType.is(DataTypeFamily.EXACT_NUMERIC)) { + if (dataType.is(DataTypeRoot.TINYINT)) { + return 3; + } else if (dataType.is(DataTypeRoot.SMALLINT)) { + return 5; + } else if (dataType.is(DataTypeRoot.INTEGER)) { + return 10; + } else if (dataType.is(DataTypeRoot.BIGINT)) { + return 19; + } else if (dataType.is(DataTypeRoot.DECIMAL)) { + return ((DecimalType) dataType).getPrecision(); + } + } + + throw new IllegalArgumentException( + "Failed to get precision of non-exact decimal type " + dataType); + } + + @VisibleForTesting + static Object coerceObject( + String timezone, + Object originalField, + DataType originalType, + DataType destinationType) { + if (originalField == null) { + return null; + } + + if (destinationType instanceof BooleanType) { + return Boolean.valueOf(originalField.toString()); + } + + if (destinationType instanceof TinyIntType) { + return coerceToByte(originalField); + } + + if (destinationType instanceof SmallIntType) { + return coerceToShort(originalField); + } + + if (destinationType instanceof IntType) { + return coerceToInt(originalField); + } + + if (destinationType instanceof BigIntType) { + return coerceToLong(originalField); + } + + if (destinationType instanceof DecimalType) { + DecimalType decimalType = (DecimalType) destinationType; + return coerceToDecimal( + originalField, decimalType.getPrecision(), decimalType.getScale()); + } + + if (destinationType instanceof FloatType) { + return coerceToFloat(originalField); + } + + if (destinationType instanceof DoubleType) { + return coerceToDouble(originalField); + } + + if (destinationType instanceof CharType) { + return coerceToString(originalField, originalType); + } + + if (destinationType instanceof VarCharType) { + return coerceToString(originalField, originalType); + } + + if (destinationType instanceof BinaryType) { + return coerceToBytes(originalField); + } + + if (destinationType instanceof VarBinaryType) { + return coerceToBytes(originalField); + } + + if (destinationType instanceof DateType) { + try { + return coerceToLong(originalField); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + String.format("Cannot fit \"%s\" into a DATE column.", originalField)); + } + } + + if (destinationType.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) + && originalType.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE)) { + // For now, TimestampData / ZonedTimestampData / LocalZonedTimestampData has no + // difference in its internal representation, so there's no need to do any precision + // conversion. + return originalField; + } + + if (destinationType.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE) + && originalType.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) { + return originalField; + } + + if (destinationType.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) + && originalType.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)) { + return originalField; + } + + if (destinationType instanceof TimestampType) { + return coerceToTimestamp(originalField, timezone); + } + + if (destinationType instanceof LocalZonedTimestampType) { + return coerceToLocalZonedTimestamp(originalField, timezone); + } + + if (destinationType instanceof ZonedTimestampType) { + return coerceToZonedTimestamp(originalField, timezone); + } + + throw new IllegalArgumentException( + String.format( + "Column type \"%s\" doesn't support type coercion to \"%s\"", + originalType, destinationType)); + } + + private static Object coerceToString(Object originalField, DataType originalType) { + if (originalField == null) { + return BinaryStringData.fromString("null"); + } + + if (originalType instanceof DateType) { + long epochOfDay = coerceToLong(originalField); + return BinaryStringData.fromString(LocalDate.ofEpochDay(epochOfDay).toString()); + } + + if (originalField instanceof StringData) { + return originalField; + } + + if (originalField instanceof byte[]) { + return BinaryStringData.fromString(hexlify((byte[]) originalField)); + } + + return BinaryStringData.fromString(originalField.toString()); + } + + private static Object coerceToBytes(Object originalField) { + if (originalField instanceof byte[]) { + return originalField; + } else { + return originalField.toString().getBytes(); + } + } + + private static byte coerceToByte(Object o) { + if (o instanceof Byte) { + return (Byte) o; + } else { + throw new IllegalArgumentException( + String.format("Cannot fit type \"%s\" into a TINYINT column. ", o.getClass())); + } + } + + private static short coerceToShort(Object o) { + if (o instanceof Byte) { + return ((Byte) o).shortValue(); + } else if (o instanceof Short) { + return (Short) o; + } else { + throw new IllegalArgumentException( + String.format( + "Cannot fit type \"%s\" into a SMALLINT column. " + + "Currently only TINYINT can be accepted by a SMALLINT column", + o.getClass())); + } + } + + private static int coerceToInt(Object o) { + if (o instanceof Byte) { + return ((Byte) o).intValue(); + } else if (o instanceof Short) { + return ((Short) o).intValue(); + } else if (o instanceof Integer) { + return (Integer) o; + } else { + throw new IllegalArgumentException( + String.format( + "Cannot fit type \"%s\" into a INT column. " + + "Currently only TINYINT / SMALLINT can be accepted by a INT column", + o.getClass())); + } + } + + private static long coerceToLong(Object o) { + if (o instanceof Byte) { + return ((Byte) o).longValue(); + } else if (o instanceof Short) { + return ((Short) o).longValue(); + } else if (o instanceof Integer) { + return ((Integer) o).longValue(); + } else if (o instanceof Long) { + return (long) o; + } else { + throw new IllegalArgumentException( + String.format( + "Cannot fit type \"%s\" into a BIGINT column. " + + "Currently only TINYINT / SMALLINT / INT can be accepted by a BIGINT column", + o.getClass())); + } + } + + private static DecimalData coerceToDecimal(Object o, int precision, int scale) { + BigDecimal decimalValue; + if (o instanceof Byte) { + decimalValue = BigDecimal.valueOf(((Byte) o).longValue(), 0); + } else if (o instanceof Short) { + decimalValue = BigDecimal.valueOf(((Short) o).longValue(), 0); + } else if (o instanceof Integer) { + decimalValue = BigDecimal.valueOf(((Integer) o).longValue(), 0); + } else if (o instanceof Long) { + decimalValue = BigDecimal.valueOf((Long) o, 0); + } else if (o instanceof DecimalData) { + decimalValue = ((DecimalData) o).toBigDecimal(); + } else { + throw new IllegalArgumentException( + String.format( + "Cannot fit type \"%s\" into a DECIMAL column. " + + "Currently only TINYINT / SMALLINT / INT / BIGINT / DECIMAL can be accepted by a DECIMAL column", + o.getClass())); + } + return decimalValue != null + ? DecimalData.fromBigDecimal(decimalValue, precision, scale) + : null; + } + + private static float coerceToFloat(Object o) { + if (o instanceof Byte) { + return ((Byte) o).floatValue(); + } else if (o instanceof Short) { + return ((Short) o).floatValue(); + } else if (o instanceof Integer) { + return ((Integer) o).floatValue(); + } else if (o instanceof Long) { + return ((Long) o).floatValue(); + } else if (o instanceof DecimalData) { + return ((DecimalData) o).toBigDecimal().floatValue(); + } else if (o instanceof Float) { + return (Float) o; + } else { + throw new IllegalArgumentException( + String.format( + "Cannot fit type \"%s\" into a FLOAT column. " + + "Currently only TINYINT / SMALLINT / INT / BIGINT / DECIMAL can be accepted by a FLOAT column", + o.getClass())); + } + } + + private static double coerceToDouble(Object o) { + if (o instanceof Byte) { + return ((Byte) o).doubleValue(); + } else if (o instanceof Short) { + return ((Short) o).doubleValue(); + } else if (o instanceof Integer) { + return ((Integer) o).doubleValue(); + } else if (o instanceof Long) { + return ((Long) o).doubleValue(); + } else if (o instanceof DecimalData) { + return ((DecimalData) o).toBigDecimal().doubleValue(); + } else if (o instanceof Float) { + return ((Float) o).doubleValue(); + } else if (o instanceof Double) { + return (Double) o; + } else { + throw new IllegalArgumentException( + String.format( + "Cannot fit type \"%s\" into a DOUBLE column. " + + "Currently only TINYINT / SMALLINT / INT / BIGINT / DECIMAL / FLOAT can be accepted by a DOUBLE column", + o.getClass())); + } + } + + private static TimestampData coerceToTimestamp(Object object, String timezone) { + if (object == null) { + return null; + } + if (object instanceof Long) { + return TimestampData.fromLocalDateTime( + LocalDate.ofEpochDay((long) object).atStartOfDay()); + } else if (object instanceof LocalZonedTimestampData) { + return TimestampData.fromLocalDateTime( + LocalDateTime.ofInstant( + ((LocalZonedTimestampData) object).toInstant(), ZoneId.of(timezone))); + } else if (object instanceof ZonedTimestampData) { + return TimestampData.fromLocalDateTime( + LocalDateTime.ofInstant( + ((ZonedTimestampData) object).toInstant(), ZoneId.of(timezone))); + } else if (object instanceof TimestampData) { + return (TimestampData) object; + } else { + throw new IllegalArgumentException( + String.format( + "Unable to implicitly coerce object `%s` as a TIMESTAMP.", object)); + } + } + + private static LocalZonedTimestampData coerceToLocalZonedTimestamp( + Object object, String timezone) { + if (object == null) { + return null; + } + + TimestampData timestampData = coerceToTimestamp(object, timezone); + return LocalZonedTimestampData.fromEpochMillis( + timestampData.getMillisecond(), timestampData.getNanoOfMillisecond()); + } + + private static ZonedTimestampData coerceToZonedTimestamp(Object object, String timezone) { + if (object == null) { + return null; + } + + TimestampData timestampData = coerceToTimestamp(object, timezone); + return ZonedTimestampData.fromZonedDateTime( + ZonedDateTime.ofInstant( + timestampData.toLocalDateTime().toInstant(ZoneOffset.UTC), + ZoneId.of(timezone))); + } + + private static String hexlify(byte[] bytes) { + return BaseEncoding.base64().encode(bytes); + } + + private static final Map, List> TYPE_MERGING_TREE = + getTypeMergingTree(); + + private static Map, List> getTypeMergingTree() { + DataType stringType = DataTypes.STRING(); + DataType doubleType = DataTypes.DOUBLE(); + DataType floatType = DataTypes.FLOAT(); + DataType decimalType = + DataTypes.DECIMAL(DecimalType.MAX_PRECISION, DecimalType.DEFAULT_SCALE); + DataType bigIntType = DataTypes.BIGINT(); + DataType intType = DataTypes.INT(); + DataType smallIntType = DataTypes.SMALLINT(); + DataType tinyIntType = DataTypes.TINYINT(); + DataType timestampTzType = DataTypes.TIMESTAMP_TZ(ZonedTimestampType.MAX_PRECISION); + DataType timestampLtzType = DataTypes.TIMESTAMP_LTZ(LocalZonedTimestampType.MAX_PRECISION); + DataType timestampType = DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION); + DataType dateType = DataTypes.DATE(); + + Map, List> mergingTree = new HashMap<>(); + + // Simple data types + mergingTree.put(VarCharType.class, ImmutableList.of(stringType)); + mergingTree.put(CharType.class, ImmutableList.of(stringType)); + mergingTree.put(BooleanType.class, ImmutableList.of(stringType)); + mergingTree.put(BinaryType.class, ImmutableList.of(stringType)); + mergingTree.put(DoubleType.class, ImmutableList.of(doubleType, stringType)); + mergingTree.put(FloatType.class, ImmutableList.of(floatType, doubleType, stringType)); + mergingTree.put(DecimalType.class, ImmutableList.of(stringType)); + mergingTree.put(BigIntType.class, ImmutableList.of(bigIntType, decimalType, stringType)); + mergingTree.put( + IntType.class, ImmutableList.of(intType, bigIntType, decimalType, stringType)); + mergingTree.put( + SmallIntType.class, + ImmutableList.of(smallIntType, intType, bigIntType, decimalType, stringType)); + mergingTree.put( + TinyIntType.class, + ImmutableList.of( + tinyIntType, smallIntType, intType, bigIntType, decimalType, stringType)); + + // Timestamp series + mergingTree.put(ZonedTimestampType.class, ImmutableList.of(timestampTzType, stringType)); + mergingTree.put( + LocalZonedTimestampType.class, + ImmutableList.of(timestampLtzType, timestampTzType, stringType)); + mergingTree.put( + TimestampType.class, + ImmutableList.of(timestampType, timestampLtzType, timestampTzType, stringType)); + mergingTree.put( + DateType.class, + ImmutableList.of( + dateType, timestampType, timestampLtzType, timestampTzType, stringType)); + mergingTree.put(TimeType.class, ImmutableList.of(stringType)); + + // Complex types + mergingTree.put(RowType.class, ImmutableList.of(stringType)); + mergingTree.put(ArrayType.class, ImmutableList.of(stringType)); + mergingTree.put(MapType.class, ImmutableList.of(stringType)); + return mergingTree; + } +} diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaUtils.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaUtils.java index 1b673b32305..51fdb46e4df 100644 --- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaUtils.java +++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/SchemaUtils.java @@ -40,7 +40,6 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -77,7 +76,7 @@ public static List createFieldGetters(List colum public static List restoreOriginalData( @Nullable RecordData recordData, List fieldGetters) { if (recordData == null) { - return Collections.emptyList(); + return null; } List actualFields = new ArrayList<>(); for (RecordData.FieldGetter fieldGetter : fieldGetters) { @@ -86,196 +85,6 @@ public static List restoreOriginalData( return actualFields; } - /** Merge compatible upstream schemas. */ - public static Schema inferWiderSchema(List schemas) { - if (schemas.isEmpty()) { - return null; - } else if (schemas.size() == 1) { - return schemas.get(0); - } else { - Schema outputSchema = null; - for (Schema schema : schemas) { - outputSchema = inferWiderSchema(outputSchema, schema); - } - return outputSchema; - } - } - - /** Try to combine two schemas with potential incompatible type. */ - @VisibleForTesting - public static Schema inferWiderSchema(@Nullable Schema lSchema, Schema rSchema) { - if (lSchema == null) { - return rSchema; - } - if (lSchema.getColumnCount() != rSchema.getColumnCount()) { - throw new IllegalStateException( - String.format( - "Unable to merge schema %s and %s with different column counts.", - lSchema, rSchema)); - } - if (!lSchema.primaryKeys().equals(rSchema.primaryKeys())) { - throw new IllegalStateException( - String.format( - "Unable to merge schema %s and %s with different primary keys.", - lSchema, rSchema)); - } - if (!lSchema.partitionKeys().equals(rSchema.partitionKeys())) { - throw new IllegalStateException( - String.format( - "Unable to merge schema %s and %s with different partition keys.", - lSchema, rSchema)); - } - if (!lSchema.options().equals(rSchema.options())) { - throw new IllegalStateException( - String.format( - "Unable to merge schema %s and %s with different options.", - lSchema, rSchema)); - } - if (!Objects.equals(lSchema.comment(), rSchema.comment())) { - throw new IllegalStateException( - String.format( - "Unable to merge schema %s and %s with different comments.", - lSchema, rSchema)); - } - - List leftColumns = lSchema.getColumns(); - List rightColumns = rSchema.getColumns(); - - List mergedColumns = - IntStream.range(0, lSchema.getColumnCount()) - .mapToObj(i -> inferWiderColumn(leftColumns.get(i), rightColumns.get(i))) - .collect(Collectors.toList()); - - return lSchema.copy(mergedColumns); - } - - /** Try to combine two columns with potential incompatible type. */ - @VisibleForTesting - public static Column inferWiderColumn(Column lColumn, Column rColumn) { - if (!Objects.equals(lColumn.getName(), rColumn.getName())) { - throw new IllegalStateException( - String.format( - "Unable to merge column %s and %s with different name.", - lColumn, rColumn)); - } - if (!Objects.equals(lColumn.getComment(), rColumn.getComment())) { - throw new IllegalStateException( - String.format( - "Unable to merge column %s and %s with different comments.", - lColumn, rColumn)); - } - return lColumn.copy(inferWiderType(lColumn.getType(), rColumn.getType())); - } - - /** Try to combine given data types to a compatible wider data type. */ - @VisibleForTesting - public static DataType inferWiderType(DataType lType, DataType rType) { - // Ignore nullability during data type merge - boolean nullable = lType.isNullable() || rType.isNullable(); - lType = lType.notNull(); - rType = rType.notNull(); - - DataType mergedType; - if (lType.equals(rType)) { - // identical type - mergedType = rType; - } else if (lType instanceof TimestampType && rType instanceof TimestampType) { - return DataTypes.TIMESTAMP( - Math.max( - ((TimestampType) lType).getPrecision(), - ((TimestampType) rType).getPrecision())); - } else if (lType instanceof ZonedTimestampType && rType instanceof ZonedTimestampType) { - return DataTypes.TIMESTAMP_TZ( - Math.max( - ((ZonedTimestampType) lType).getPrecision(), - ((ZonedTimestampType) rType).getPrecision())); - } else if (lType instanceof LocalZonedTimestampType - && rType instanceof LocalZonedTimestampType) { - return DataTypes.TIMESTAMP_LTZ( - Math.max( - ((LocalZonedTimestampType) lType).getPrecision(), - ((LocalZonedTimestampType) rType).getPrecision())); - } else if (lType.is(DataTypeFamily.TIMESTAMP) && rType.is(DataTypeFamily.TIMESTAMP)) { - return DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION); - } else if (lType.is(DataTypeFamily.INTEGER_NUMERIC) - && rType.is(DataTypeFamily.INTEGER_NUMERIC)) { - mergedType = DataTypes.BIGINT(); - } else if (lType.is(DataTypeFamily.CHARACTER_STRING) - && rType.is(DataTypeFamily.CHARACTER_STRING)) { - mergedType = DataTypes.STRING(); - } else if (lType.is(DataTypeFamily.APPROXIMATE_NUMERIC) - && rType.is(DataTypeFamily.APPROXIMATE_NUMERIC)) { - mergedType = DataTypes.DOUBLE(); - } else if (lType instanceof DecimalType && rType instanceof DecimalType) { - // Merge two decimal types - DecimalType lhsDecimal = (DecimalType) lType; - DecimalType rhsDecimal = (DecimalType) rType; - int resultIntDigits = - Math.max( - lhsDecimal.getPrecision() - lhsDecimal.getScale(), - rhsDecimal.getPrecision() - rhsDecimal.getScale()); - int resultScale = Math.max(lhsDecimal.getScale(), rhsDecimal.getScale()); - Preconditions.checkArgument( - resultIntDigits + resultScale <= DecimalType.MAX_PRECISION, - String.format( - "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available", - lType, - rType, - resultIntDigits + resultScale, - DecimalType.MAX_PRECISION)); - mergedType = DataTypes.DECIMAL(resultIntDigits + resultScale, resultScale); - } else if (lType instanceof DecimalType && rType.is(DataTypeFamily.EXACT_NUMERIC)) { - // Merge decimal and int - mergedType = mergeExactNumericsIntoDecimal((DecimalType) lType, rType); - } else if (rType instanceof DecimalType && lType.is(DataTypeFamily.EXACT_NUMERIC)) { - // Merge decimal and int - mergedType = mergeExactNumericsIntoDecimal((DecimalType) rType, lType); - } else { - throw new IllegalStateException( - String.format("Incompatible types: \"%s\" and \"%s\"", lType, rType)); - } - - if (nullable) { - return mergedType.nullable(); - } else { - return mergedType.notNull(); - } - } - - private static DataType mergeExactNumericsIntoDecimal( - DecimalType decimalType, DataType otherType) { - int resultPrecision = - Math.max( - decimalType.getPrecision(), - decimalType.getScale() + getNumericPrecision(otherType)); - Preconditions.checkArgument( - resultPrecision <= DecimalType.MAX_PRECISION, - String.format( - "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available", - decimalType, otherType, resultPrecision, DecimalType.MAX_PRECISION)); - return DataTypes.DECIMAL(resultPrecision, decimalType.getScale()); - } - - @VisibleForTesting - public static int getNumericPrecision(DataType dataType) { - if (dataType.is(DataTypeFamily.EXACT_NUMERIC)) { - if (dataType.is(DataTypeRoot.TINYINT)) { - return 3; - } else if (dataType.is(DataTypeRoot.SMALLINT)) { - return 5; - } else if (dataType.is(DataTypeRoot.INTEGER)) { - return 10; - } else if (dataType.is(DataTypeRoot.BIGINT)) { - return 19; - } else if (dataType.is(DataTypeRoot.DECIMAL)) { - return ((DecimalType) dataType).getPrecision(); - } - } - - throw new IllegalArgumentException( - "Failed to get precision of non-exact decimal type " + dataType); - } - /** apply SchemaChangeEvent to the old schema and return the schema after changing. */ public static Schema applySchemaChangeEvent(Schema schema, SchemaChangeEvent event) { return SchemaChangeEventVisitor.visit( @@ -469,4 +278,309 @@ public static Optional transformSchemaChangeEvent( } return evolvedSchemaChangeEvent; } + + /** + * This function checks if the given schema change event has been applied already. If so, it + * will be ignored to avoid sending duplicate evolved schema change events to sink metadata + * applier. + */ + public static boolean isSchemaChangeEventRedundant( + @Nullable Schema currentSchema, SchemaChangeEvent event) { + Optional latestSchema = Optional.ofNullable(currentSchema); + return Boolean.TRUE.equals( + SchemaChangeEventVisitor.visit( + event, + addColumnEvent -> { + // It has not been applied if schema does not even exist + if (!latestSchema.isPresent()) { + return false; + } + List existedColumns = latestSchema.get().getColumns(); + + // It has been applied only if all columns are present in existedColumns + for (AddColumnEvent.ColumnWithPosition column : + addColumnEvent.getAddedColumns()) { + if (!existedColumns.contains(column.getAddColumn())) { + return false; + } + } + return true; + }, + alterColumnTypeEvent -> { + // It has not been applied if schema does not even exist + if (!latestSchema.isPresent()) { + return false; + } + Schema schema = latestSchema.get(); + + // It has been applied only if all column types are set as expected + for (Map.Entry entry : + alterColumnTypeEvent.getTypeMapping().entrySet()) { + if (!schema.getColumn(entry.getKey()).isPresent() + || !schema.getColumn(entry.getKey()) + .get() + .getType() + .equals(entry.getValue())) { + return false; + } + } + return true; + }, + createTableEvent -> { + // It has been applied if such table already exists + return latestSchema.isPresent(); + }, + dropColumnEvent -> { + // It has not been applied if schema does not even exist + if (!latestSchema.isPresent()) { + return false; + } + List existedColumnNames = latestSchema.get().getColumnNames(); + + // It has been applied only if corresponding column types do not exist + return dropColumnEvent.getDroppedColumnNames().stream() + .noneMatch(existedColumnNames::contains); + }, + dropTableEvent -> { + // It has been applied if such table does not exist + return !latestSchema.isPresent(); + }, + renameColumnEvent -> { + // It has been applied if such table already exists + if (!latestSchema.isPresent()) { + return false; + } + List existedColumnNames = latestSchema.get().getColumnNames(); + + // It has been applied only if all previous names do not exist, and all + // new names already exist + for (Map.Entry entry : + renameColumnEvent.getNameMapping().entrySet()) { + if (existedColumnNames.contains(entry.getKey()) + || !existedColumnNames.contains(entry.getValue())) { + return false; + } + } + return true; + }, + truncateTableEvent -> { + // We have no way to ensure if a TruncateTableEvent has been applied + // before. Just assume it's not. + return false; + })); + } + + // Schema merging related utility methods have been moved to SchemaMergingUtils class. + // The following methods have been deprecated and should not be used. + + /** + * Merge compatible schemas. + * + * @deprecated Use {@code getCommonSchema} in {@link SchemaMergingUtils} instead. + */ + @Deprecated + public static Schema inferWiderSchema(List schemas) { + if (schemas.isEmpty()) { + return null; + } else if (schemas.size() == 1) { + return schemas.get(0); + } else { + Schema outputSchema = null; + for (Schema schema : schemas) { + outputSchema = inferWiderSchema(outputSchema, schema); + } + return outputSchema; + } + } + + /** + * Try to combine two schemas with potential incompatible type. + * + * @deprecated Use {@code getLeastCommonSchema} in {@link SchemaMergingUtils} instead. + */ + @Deprecated + @VisibleForTesting + public static Schema inferWiderSchema(@Nullable Schema lSchema, Schema rSchema) { + if (lSchema == null) { + return rSchema; + } + if (lSchema.getColumnCount() != rSchema.getColumnCount()) { + throw new IllegalStateException( + String.format( + "Unable to merge schema %s and %s with different column counts.", + lSchema, rSchema)); + } + if (!lSchema.primaryKeys().equals(rSchema.primaryKeys())) { + throw new IllegalStateException( + String.format( + "Unable to merge schema %s and %s with different primary keys.", + lSchema, rSchema)); + } + if (!lSchema.partitionKeys().equals(rSchema.partitionKeys())) { + throw new IllegalStateException( + String.format( + "Unable to merge schema %s and %s with different partition keys.", + lSchema, rSchema)); + } + if (!lSchema.options().equals(rSchema.options())) { + throw new IllegalStateException( + String.format( + "Unable to merge schema %s and %s with different options.", + lSchema, rSchema)); + } + if (!Objects.equals(lSchema.comment(), rSchema.comment())) { + throw new IllegalStateException( + String.format( + "Unable to merge schema %s and %s with different comments.", + lSchema, rSchema)); + } + + List leftColumns = lSchema.getColumns(); + List rightColumns = rSchema.getColumns(); + + List mergedColumns = + IntStream.range(0, lSchema.getColumnCount()) + .mapToObj(i -> inferWiderColumn(leftColumns.get(i), rightColumns.get(i))) + .collect(Collectors.toList()); + + return lSchema.copy(mergedColumns); + } + + /** + * Try to combine two columns with potential incompatible type. + * + * @deprecated Use {@code getLeastCommonType} in {@link SchemaMergingUtils} instead. + */ + @Deprecated + @VisibleForTesting + public static Column inferWiderColumn(Column lColumn, Column rColumn) { + if (!Objects.equals(lColumn.getName(), rColumn.getName())) { + throw new IllegalStateException( + String.format( + "Unable to merge column %s and %s with different name.", + lColumn, rColumn)); + } + if (!Objects.equals(lColumn.getComment(), rColumn.getComment())) { + throw new IllegalStateException( + String.format( + "Unable to merge column %s and %s with different comments.", + lColumn, rColumn)); + } + return lColumn.copy(inferWiderType(lColumn.getType(), rColumn.getType())); + } + + /** + * Try to combine given data types to a compatible wider data type. + * + * @deprecated Use {@code getLeastCommonType} in {@link SchemaMergingUtils} instead. + */ + @Deprecated + @VisibleForTesting + public static DataType inferWiderType(DataType lType, DataType rType) { + // Ignore nullability during data type merge + boolean nullable = lType.isNullable() || rType.isNullable(); + lType = lType.notNull(); + rType = rType.notNull(); + + DataType mergedType; + if (lType.equals(rType)) { + // identical type + mergedType = rType; + } else if (lType instanceof TimestampType && rType instanceof TimestampType) { + return DataTypes.TIMESTAMP( + Math.max( + ((TimestampType) lType).getPrecision(), + ((TimestampType) rType).getPrecision())); + } else if (lType instanceof ZonedTimestampType && rType instanceof ZonedTimestampType) { + return DataTypes.TIMESTAMP_TZ( + Math.max( + ((ZonedTimestampType) lType).getPrecision(), + ((ZonedTimestampType) rType).getPrecision())); + } else if (lType instanceof LocalZonedTimestampType + && rType instanceof LocalZonedTimestampType) { + return DataTypes.TIMESTAMP_LTZ( + Math.max( + ((LocalZonedTimestampType) lType).getPrecision(), + ((LocalZonedTimestampType) rType).getPrecision())); + } else if (lType.is(DataTypeFamily.TIMESTAMP) && rType.is(DataTypeFamily.TIMESTAMP)) { + return DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION); + } else if (lType.is(DataTypeFamily.INTEGER_NUMERIC) + && rType.is(DataTypeFamily.INTEGER_NUMERIC)) { + mergedType = DataTypes.BIGINT(); + } else if (lType.is(DataTypeFamily.CHARACTER_STRING) + && rType.is(DataTypeFamily.CHARACTER_STRING)) { + mergedType = DataTypes.STRING(); + } else if (lType.is(DataTypeFamily.APPROXIMATE_NUMERIC) + && rType.is(DataTypeFamily.APPROXIMATE_NUMERIC)) { + mergedType = DataTypes.DOUBLE(); + } else if (lType instanceof DecimalType && rType instanceof DecimalType) { + // Merge two decimal types + DecimalType lhsDecimal = (DecimalType) lType; + DecimalType rhsDecimal = (DecimalType) rType; + int resultIntDigits = + Math.max( + lhsDecimal.getPrecision() - lhsDecimal.getScale(), + rhsDecimal.getPrecision() - rhsDecimal.getScale()); + int resultScale = Math.max(lhsDecimal.getScale(), rhsDecimal.getScale()); + Preconditions.checkArgument( + resultIntDigits + resultScale <= DecimalType.MAX_PRECISION, + String.format( + "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available", + lType, + rType, + resultIntDigits + resultScale, + DecimalType.MAX_PRECISION)); + mergedType = DataTypes.DECIMAL(resultIntDigits + resultScale, resultScale); + } else if (lType instanceof DecimalType && rType.is(DataTypeFamily.EXACT_NUMERIC)) { + // Merge decimal and int + mergedType = mergeExactNumericsIntoDecimal((DecimalType) lType, rType); + } else if (rType instanceof DecimalType && lType.is(DataTypeFamily.EXACT_NUMERIC)) { + // Merge decimal and int + mergedType = mergeExactNumericsIntoDecimal((DecimalType) rType, lType); + } else { + throw new IllegalStateException( + String.format("Incompatible types: \"%s\" and \"%s\"", lType, rType)); + } + + if (nullable) { + return mergedType.nullable(); + } else { + return mergedType.notNull(); + } + } + + private static DataType mergeExactNumericsIntoDecimal( + DecimalType decimalType, DataType otherType) { + int resultPrecision = + Math.max( + decimalType.getPrecision(), + decimalType.getScale() + getNumericPrecision(otherType)); + Preconditions.checkArgument( + resultPrecision <= DecimalType.MAX_PRECISION, + String.format( + "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available", + decimalType, otherType, resultPrecision, DecimalType.MAX_PRECISION)); + return DataTypes.DECIMAL(resultPrecision, decimalType.getScale()); + } + + @Deprecated + @VisibleForTesting + public static int getNumericPrecision(DataType dataType) { + if (dataType.is(DataTypeFamily.EXACT_NUMERIC)) { + if (dataType.is(DataTypeRoot.TINYINT)) { + return 3; + } else if (dataType.is(DataTypeRoot.SMALLINT)) { + return 5; + } else if (dataType.is(DataTypeRoot.INTEGER)) { + return 10; + } else if (dataType.is(DataTypeRoot.BIGINT)) { + return 19; + } else if (dataType.is(DataTypeRoot.DECIMAL)) { + return ((DecimalType) dataType).getPrecision(); + } + } + + throw new IllegalArgumentException( + "Failed to get precision of non-exact decimal type " + dataType); + } } diff --git a/flink-cdc-common/src/test/java/org/apache/flink/cdc/common/utils/SchemaMergingUtilsTest.java b/flink-cdc-common/src/test/java/org/apache/flink/cdc/common/utils/SchemaMergingUtilsTest.java new file mode 100644 index 00000000000..230adde5316 --- /dev/null +++ b/flink-cdc-common/src/test/java/org/apache/flink/cdc/common/utils/SchemaMergingUtilsTest.java @@ -0,0 +1,892 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.common.utils; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple4; +import org.apache.flink.cdc.common.data.DecimalData; +import org.apache.flink.cdc.common.data.LocalZonedTimestampData; +import org.apache.flink.cdc.common.data.TimestampData; +import org.apache.flink.cdc.common.data.ZonedTimestampData; +import org.apache.flink.cdc.common.data.binary.BinaryStringData; +import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.types.DataType; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.common.types.DecimalType; +import org.apache.flink.cdc.common.types.LocalZonedTimestampType; +import org.apache.flink.cdc.common.types.TimestampType; +import org.apache.flink.cdc.common.types.ZonedTimestampType; + +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import javax.annotation.Nullable; + +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.LocalDate; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.apache.flink.cdc.common.types.DataTypes.DECIMAL; +import static org.apache.flink.cdc.common.types.DataTypes.VARCHAR; +import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.coerceObject; +import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.coerceRow; +import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.getLeastCommonSchema; +import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.getSchemaDifference; +import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.isDataTypeCompatible; +import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.isSchemaCompatible; + +/** A test for the {@link SchemaMergingUtils}. */ +class SchemaMergingUtilsTest { + + private static final TableId TABLE_ID = TableId.tableId("foo", "bar", "baz"); + + private static final DataType CHAR = DataTypes.CHAR(17); + private static final DataType VARCHAR = DataTypes.VARCHAR(17); + private static final DataType STRING = DataTypes.STRING(); + + private static final DataType BOOLEAN = DataTypes.BOOLEAN(); + private static final DataType BINARY = DataTypes.BINARY(17); + private static final DataType SMALLINT = DataTypes.SMALLINT(); + private static final DataType TINYINT = DataTypes.TINYINT(); + private static final DataType INT = DataTypes.INT(); + private static final DataType BIGINT = DataTypes.BIGINT(); + private static final DataType DECIMAL = + DECIMAL(DecimalType.MAX_PRECISION, DecimalType.DEFAULT_SCALE); + private static final DataType FLOAT = DataTypes.FLOAT(); + private static final DataType DOUBLE = DataTypes.DOUBLE(); + + private static final DataType TIMESTAMP_TZ = + DataTypes.TIMESTAMP_TZ(ZonedTimestampType.MAX_PRECISION); + private static final DataType TIMESTAMP_LTZ = + DataTypes.TIMESTAMP_LTZ(LocalZonedTimestampType.MAX_PRECISION); + private static final DataType TIMESTAMP = DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION); + private static final DataType DATE = DataTypes.DATE(); + private static final DataType TIME = DataTypes.TIME(); + + private static final DataType ROW = DataTypes.ROW(INT, STRING); + private static final DataType ARRAY = DataTypes.ARRAY(STRING); + private static final DataType MAP = DataTypes.MAP(INT, STRING); + + private static final Map DUMMY_OBJECTS = + ImmutableMap.of( + TINYINT, + (byte) 17, + SMALLINT, + (short) 17, + INT, + 17, + BIGINT, + 17L, + DECIMAL, + decOf(17), + FLOAT, + 17.0f, + DOUBLE, + 17.0); + + @Test + void testIsSchemaCompatible() { + Assertions.assertThat(isSchemaCompatible(null, of("id", BIGINT, "name", VARCHAR(17)))) + .as("test merging into an empty schema") + .isFalse(); + + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test identical schema") + .isTrue(); + + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "name", VARCHAR(17)), + of("name", VARCHAR(17), "id", BIGINT))) + .as("swapping sequence is ok") + .isTrue(); + + Assertions.assertThat( + isSchemaCompatible(of("id", BIGINT, "name", VARCHAR(17)), of("id", BIGINT))) + .as("test a wider upcoming schema") + .isTrue(); + + Assertions.assertThat( + isSchemaCompatible(of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a narrower upcoming schema") + .isFalse(); + + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "name", STRING), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a wider typed upcoming schema") + .isTrue(); + + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", STRING))) + .as("test a narrower typed upcoming schema") + .isFalse(); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "number", BIGINT), + of("id", BIGINT, "number", type))) + .as("test fitting %s into BIGINT", type) + .isTrue()); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", BIGINT))) + .as("test fitting BIGINT into %s", type) + .isFalse()); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING) + .forEach( + type -> + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "number", STRING), + of("id", BIGINT, "number", type))) + .as("test fitting %s into STRING", type) + .isTrue()); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL) + .forEach( + type -> + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", STRING))) + .as("test fitting STRING into %s", type) + .isFalse()); + + Stream.of(FLOAT, DOUBLE, STRING) + .forEach( + type -> + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "number", STRING), + of("id", BIGINT, "number", type))) + .as("test fitting %s into STRING", type) + .isTrue()); + + Stream.of(FLOAT, DOUBLE) + .forEach( + type -> + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", STRING))) + .as("test fitting STRING into %s", type) + .isFalse()); + + Assertions.assertThat( + isSchemaCompatible( + of("id", BIGINT, "foo", INT), of("id", BIGINT, "bar", INT))) + .as("columns with different names") + .isFalse(); + } + + @Test + void testGetLeastCommonSchema() { + Assertions.assertThat(getLeastCommonSchema(null, of("id", BIGINT, "name", VARCHAR(17)))) + .as("test merging into an empty schema") + .isEqualTo(of("id", BIGINT, "name", VARCHAR(17))); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test identical schema") + .isEqualTo(of("id", BIGINT, "name", VARCHAR(17))); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "name", VARCHAR(17)), + of("name", VARCHAR(17), "id", BIGINT))) + .as("swapping sequence is ok") + .isEqualTo(of("id", BIGINT, "name", VARCHAR(17))); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "name", VARCHAR(17)), of("id", BIGINT))) + .as("test a wider upcoming schema") + .isEqualTo(of("id", BIGINT, "name", VARCHAR(17))); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a narrower upcoming schema") + .isEqualTo(of("id", BIGINT, "name", VARCHAR(17))); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "name", STRING), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a wider typed upcoming schema") + .isEqualTo(of("id", BIGINT, "name", STRING)); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", STRING))) + .as("test a narrower typed upcoming schema") + .isEqualTo(of("id", BIGINT, "name", STRING)); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "number", BIGINT), + of("id", BIGINT, "number", type))) + .as("test fitting %s into BIGINT", type) + .isEqualTo(of("id", BIGINT, "number", BIGINT))); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", BIGINT))) + .as("test fitting BIGINT into %s", type) + .isEqualTo(of("id", BIGINT, "number", BIGINT))); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING) + .forEach( + type -> + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "number", STRING), + of("id", BIGINT, "number", type))) + .as("test fitting %s into STRING", type) + .isEqualTo(of("id", BIGINT, "number", STRING))); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING) + .forEach( + type -> + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", STRING))) + .as("test fitting STRING into %s", type) + .isEqualTo(of("id", BIGINT, "number", STRING))); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "foo", INT), of("id", BIGINT, "bar", INT))) + .as("columns with different names") + .isEqualTo(of("id", BIGINT, "foo", INT, "bar", INT)); + + Assertions.assertThat( + getLeastCommonSchema( + of("id", BIGINT, "foo", INT, "baz", FLOAT), + of("id", BIGINT, "bar", INT, "baz", DOUBLE))) + .as("mixed schema differences") + .isEqualTo(of("id", BIGINT, "foo", INT, "baz", DOUBLE, "bar", INT)); + } + + @Test + void testGetSchemaDifference() { + Assertions.assertThat( + getSchemaDifference(TABLE_ID, null, of("id", BIGINT, "name", VARCHAR(17)))) + .as("test merging into an empty schema") + .containsExactly( + new CreateTableEvent(TABLE_ID, of("id", BIGINT, "name", VARCHAR(17)))); + + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test identical schema") + .isEmpty(); + + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, + of("id", BIGINT, "name", VARCHAR(17)), + of("name", VARCHAR(17), "id", BIGINT))) + .as("swapping sequence is ok") + .isEmpty(); + + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a widening upcoming schema") + .containsExactly( + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("name", VARCHAR(17)), + AddColumnEvent.ColumnPosition.AFTER, + "id")))); + + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, of("id", BIGINT), of("name", VARCHAR(17), "id", BIGINT))) + .as("test a widening upcoming schema at first") + .containsExactly( + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("name", VARCHAR(17)), + AddColumnEvent.ColumnPosition.FIRST, + null)))); + + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", STRING))) + .as("test a type-widening typed upcoming schema") + .containsExactly( + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("name", STRING), + Collections.singletonMap("name", VARCHAR(17)))); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", BIGINT))) + .as("test escalating %s to BIGINT", type) + .containsExactly( + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("number", BIGINT), + Collections.singletonMap("number", type)))); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, FLOAT) + .forEach( + type -> + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", DOUBLE))) + .as("test escalating %s to DOUBLE", type) + .containsExactly( + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("number", DOUBLE), + Collections.singletonMap("number", type)))); + + Assertions.assertThat( + getSchemaDifference( + TABLE_ID, + of("id", BIGINT, "foo", INT, "baz", FLOAT), + of("id", BIGINT, "foo", BIGINT, "bar", INT, "baz", DOUBLE))) + .as("mixed schema differences") + .containsExactly( + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("bar", INT), + AddColumnEvent.ColumnPosition.AFTER, + "foo"))), + new AlterColumnTypeEvent( + TABLE_ID, + ImmutableMap.of("foo", BIGINT, "baz", DOUBLE), + ImmutableMap.of("foo", INT, "baz", FLOAT))); + } + + @Test + void testMergeAndDiff() { + Assertions.assertThat(mergeAndDiff(null, of("id", BIGINT, "name", VARCHAR(17)))) + .as("test merging into an empty schema") + .containsExactly( + new CreateTableEvent(TABLE_ID, of("id", BIGINT, "name", VARCHAR(17)))); + + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test identical schema") + .isEmpty(); + + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "name", VARCHAR(17)), + of("name", VARCHAR(17), "id", BIGINT))) + .as("swapping sequence is ok") + .isEmpty(); + + Assertions.assertThat(mergeAndDiff(of("id", BIGINT, "name", VARCHAR(17)), of("id", BIGINT))) + .as("test a wider upcoming schema") + .isEmpty(); + + Assertions.assertThat(mergeAndDiff(of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a narrower upcoming schema") + .containsExactly( + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("name", VARCHAR(17)), + AddColumnEvent.ColumnPosition.AFTER, + "id")))); + + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "name", STRING), + of("id", BIGINT, "name", VARCHAR(17)))) + .as("test a wider typed upcoming schema") + .isEmpty(); + + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", STRING))) + .as("test a narrower typed upcoming schema") + .containsExactly( + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("name", STRING), + Collections.singletonMap("name", VARCHAR(17)))); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "number", BIGINT), + of("id", BIGINT, "number", type))) + .as("test fitting %s into BIGINT", type) + .isEmpty()); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", BIGINT))) + .as("test fitting BIGINT into %s", type) + .containsExactly( + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("number", BIGINT), + Collections.singletonMap("number", type)))); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING) + .forEach( + type -> + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "number", STRING), + of("id", BIGINT, "number", type))) + .as("test fitting %s into STRING", type) + .isEmpty()); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL) + .forEach( + type -> + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "number", type), + of("id", BIGINT, "number", STRING))) + .as("test fitting STRING into %s", type) + .containsExactly( + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("number", STRING), + Collections.singletonMap("number", type)))); + + Assertions.assertThat( + mergeAndDiff( + of("id", BIGINT, "foo", INT, "baz", FLOAT), + of("id", BIGINT, "bar", INT, "baz", DOUBLE))) + .as("mixed schema differences") + .containsExactly( + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("bar", INT), + AddColumnEvent.ColumnPosition.AFTER, + "baz"))), + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("baz", DOUBLE), + Collections.singletonMap("baz", FLOAT))); + } + + @Test + void testIsDataTypeCompatible() { + List> viableConversions = + Arrays.asList( + Tuple2.of(CHAR, STRING), + Tuple2.of(VARCHAR, STRING), + Tuple2.of(BOOLEAN, STRING), + Tuple2.of(BINARY, STRING), + Tuple2.of(DOUBLE, STRING), + Tuple2.of(FLOAT, STRING), + Tuple2.of(DECIMAL, STRING), + Tuple2.of(BIGINT, STRING), + Tuple2.of(INT, STRING), + Tuple2.of(SMALLINT, STRING), + Tuple2.of(TINYINT, STRING), + Tuple2.of(TIMESTAMP_TZ, STRING), + Tuple2.of(TIMESTAMP_LTZ, STRING), + Tuple2.of(TIMESTAMP, STRING), + Tuple2.of(DATE, STRING), + Tuple2.of(TIME, STRING), + Tuple2.of(ROW, STRING), + Tuple2.of(ARRAY, STRING), + Tuple2.of(MAP, STRING), + Tuple2.of(TINYINT, SMALLINT), + Tuple2.of(SMALLINT, INT), + Tuple2.of(INT, BIGINT), + Tuple2.of(BIGINT, DECIMAL), + Tuple2.of(DECIMAL, STRING), + Tuple2.of(FLOAT, DOUBLE), + Tuple2.of(DATE, TIMESTAMP), + Tuple2.of(TIMESTAMP, TIMESTAMP_LTZ), + Tuple2.of(TIMESTAMP_LTZ, TIMESTAMP_TZ)); + + List> infeasibleConversions = + Arrays.asList( + Tuple2.of(CHAR, BOOLEAN), + Tuple2.of(BOOLEAN, BINARY), + Tuple2.of(BINARY, DOUBLE), + Tuple2.of(DOUBLE, TIMESTAMP_TZ), + Tuple2.of(TIMESTAMP_TZ, TIME), + Tuple2.of(TIME, ROW), + Tuple2.of(ROW, ARRAY), + Tuple2.of(ARRAY, MAP)); + + viableConversions.forEach( + conv -> + Assertions.assertThat(isDataTypeCompatible(conv.f1, conv.f0)) + .as("test fitting %s into %s", conv.f0, conv.f1) + .isTrue()); + + viableConversions.forEach( + conv -> + Assertions.assertThat(isDataTypeCompatible(conv.f0, conv.f1)) + .as("test fitting %s into %s", conv.f1, conv.f0) + .isFalse()); + + infeasibleConversions.forEach( + conv -> + Assertions.assertThat(isDataTypeCompatible(conv.f1, conv.f0)) + .as("test fitting %s into %s", conv.f0, conv.f1) + .isFalse()); + + infeasibleConversions.forEach( + conv -> + Assertions.assertThat(isDataTypeCompatible(conv.f0, conv.f1)) + .as("test fitting %s into %s", conv.f1, conv.f0) + .isFalse()); + } + + @Test + void testCoerceObject() { + Stream> conversionExpects = + Stream.of( + // From TINYINT + Tuple4.of(TINYINT, (byte) 0, TINYINT, (byte) 0), + Tuple4.of(TINYINT, (byte) 1, SMALLINT, (short) 1), + Tuple4.of(TINYINT, (byte) 2, INT, 2), + Tuple4.of(TINYINT, (byte) 3, BIGINT, 3L), + Tuple4.of(TINYINT, (byte) 4, DECIMAL, decOf(4)), + Tuple4.of(TINYINT, (byte) 5, FLOAT, 5.0f), + Tuple4.of(TINYINT, (byte) 6, DOUBLE, 6.0), + Tuple4.of(TINYINT, (byte) 7, STRING, binStrOf("7")), + + // From SMALLINT + Tuple4.of(SMALLINT, (short) 1, SMALLINT, (short) 1), + Tuple4.of(SMALLINT, (short) 2, INT, 2), + Tuple4.of(SMALLINT, (short) 3, BIGINT, 3L), + Tuple4.of(SMALLINT, (short) 4, DECIMAL, decOf(4)), + Tuple4.of(SMALLINT, (short) 5, FLOAT, 5.0f), + Tuple4.of(SMALLINT, (short) 6, DOUBLE, 6.0), + Tuple4.of(SMALLINT, (short) 7, STRING, binStrOf("7")), + + // From INT + Tuple4.of(INT, 2, INT, 2), + Tuple4.of(INT, 3, BIGINT, 3L), + Tuple4.of(INT, 4, DECIMAL, decOf(4)), + Tuple4.of(INT, 5, FLOAT, 5.0f), + Tuple4.of(INT, 6, DOUBLE, 6.0), + Tuple4.of(INT, 7, STRING, binStrOf("7")), + + // From BIGINT + Tuple4.of(BIGINT, 3L, BIGINT, 3L), + Tuple4.of(BIGINT, 4L, DECIMAL, decOf(4)), + Tuple4.of(BIGINT, 5L, FLOAT, 5.0f), + Tuple4.of(BIGINT, 6L, DOUBLE, 6.0), + Tuple4.of(BIGINT, 7L, STRING, binStrOf("7")), + + // From DECIMAL + Tuple4.of(DECIMAL, decOf(4), DECIMAL, decOf(4)), + Tuple4.of(DECIMAL, decOf(5), FLOAT, 5.0f), + Tuple4.of(DECIMAL, decOf(6), DOUBLE, 6.0), + Tuple4.of(DECIMAL, decOf(7), STRING, binStrOf("7")), + + // From FLOAT + Tuple4.of(FLOAT, 5.0f, FLOAT, 5.0f), + Tuple4.of(FLOAT, 6.0f, DOUBLE, 6.0), + Tuple4.of(FLOAT, 7.0f, STRING, binStrOf("7.0")), + + // From DOUBLE + Tuple4.of(DOUBLE, 6.0f, DOUBLE, 6.0), + Tuple4.of(DOUBLE, 7.0f, STRING, binStrOf("7.0")), + + // From STRING + Tuple4.of(STRING, binStrOf("AtoZ"), STRING, binStrOf("AtoZ")), + Tuple4.of(STRING, binStrOf("lie"), STRING, binStrOf("lie")), + + // From CHAR + Tuple4.of( + CHAR, binStrOf("les miserables"), CHAR, binStrOf("les miserables")), + Tuple4.of(CHAR, binStrOf("notre dame"), STRING, binStrOf("notre dame")), + + // From Binary + Tuple4.of(BINARY, binOf("les miserables"), BINARY, binOf("les miserables")), + Tuple4.of( + BINARY, binOf("notre dame"), STRING, binStrOf("bm90cmUgZGFtZQ==")), + + // From BOOLEAN + Tuple4.of(BOOLEAN, true, BOOLEAN, true), + Tuple4.of(BOOLEAN, false, BOOLEAN, false), + Tuple4.of(BOOLEAN, true, STRING, binStrOf("true")), + Tuple4.of(BOOLEAN, false, STRING, binStrOf("false")), + + // From DATE + Tuple4.of(DATE, dateOf(2017, 1, 1), DATE, dateOf(2017, 1, 1)), + Tuple4.of(DATE, dateOf(2018, 2, 2), TIMESTAMP, tsOf("2018", "02", "02")), + Tuple4.of( + DATE, + dateOf(2019, 3, 3), + TIMESTAMP_LTZ, + ltzTsOf("2019", "03", "03")), + Tuple4.of( + DATE, dateOf(2020, 4, 4), TIMESTAMP_TZ, zTsOf("2020", "04", "04")), + Tuple4.of(DATE, dateOf(2021, 5, 5), STRING, binStrOf("2021-05-05")), + + // From TIMESTAMP + Tuple4.of( + TIMESTAMP, + tsOf("2022", "06", "06"), + TIMESTAMP, + tsOf("2022", "06", "06")), + Tuple4.of( + TIMESTAMP, + tsOf("2023", "07", "07"), + TIMESTAMP_LTZ, + ltzTsOf("2023", "07", "07")), + Tuple4.of( + TIMESTAMP, + tsOf("2024", "08", "08"), + TIMESTAMP_TZ, + zTsOf("2024", "08", "08")), + Tuple4.of( + TIMESTAMP, + tsOf("2025", "09", "09"), + STRING, + binStrOf("2025-09-09T00:00")), + + // From TIMESTAMP_LTZ + Tuple4.of( + TIMESTAMP_LTZ, + ltzTsOf("2026", "10", "10"), + TIMESTAMP_LTZ, + ltzTsOf("2026", "10", "10")), + Tuple4.of( + TIMESTAMP_LTZ, + ltzTsOf("2027", "11", "11"), + TIMESTAMP_TZ, + zTsOf("2027", "11", "11")), + Tuple4.of( + TIMESTAMP_LTZ, + ltzTsOf("2028", "12", "12"), + STRING, + binStrOf("2028-12-12T00:00")), + + // From TIMESTAMP_TZ + Tuple4.of( + TIMESTAMP_TZ, + zTsOf("2018", "01", "01"), + TIMESTAMP_TZ, + zTsOf("2018", "01", "01")), + Tuple4.of( + TIMESTAMP_TZ, + zTsOf("2019", "02", "02"), + STRING, + binStrOf("2019-02-02T00:00:00Z"))); + + conversionExpects.forEach( + rule -> + Assertions.assertThat(coerceObject("UTC", rule.f1, rule.f0, rule.f2)) + .as("Try coercing %s (%s) to %s type", rule.f1, rule.f0, rule.f2) + .isEqualTo(rule.f3)); + } + + @Test + void testCoerceRow() { + Assertions.assertThat( + coerceRow( + "UTC", + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT, "name", VARCHAR(17)), + Arrays.asList(2L, binStrOf("Bob")))) + .as("test identical schema") + .containsExactly(2L, binStrOf("Bob")); + + Assertions.assertThat( + coerceRow( + "UTC", + of("id", BIGINT, "name", VARCHAR(17)), + of("name", VARCHAR(17), "id", BIGINT), + Arrays.asList(binStrOf("Cecily"), 3L))) + .as("swapping sequence is ok") + .containsExactly(3L, binStrOf("Cecily")); + + Assertions.assertThat( + coerceRow( + "UTC", + of("id", BIGINT, "name", VARCHAR(17)), + of("id", BIGINT), + Collections.singletonList(4L))) + .as("test a wider upcoming schema") + .containsExactly(4L, null); + + Assertions.assertThat( + coerceRow( + "UTC", + of("id", BIGINT, "name", STRING), + of("id", BIGINT, "name", VARCHAR(17)), + Arrays.asList(4L, "Derrida"))) + .as("test a wider typed upcoming schema") + .containsExactly(4L, binStrOf("Derrida")); + + Stream.of(TINYINT, SMALLINT, INT) + .forEach( + type -> + Assertions.assertThat( + coerceRow( + "UTC", + of("id", BIGINT, "number", BIGINT), + of("id", BIGINT, "number", type), + Arrays.asList(5L, DUMMY_OBJECTS.get(type)))) + .as("test fitting %s into BIGINT", type) + .containsExactly(5L, 17L)); + + Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, FLOAT) + .forEach( + type -> + Assertions.assertThat( + coerceRow( + "UTC", + of("id", BIGINT, "number", DOUBLE), + of("id", BIGINT, "number", type), + Arrays.asList(6L, DUMMY_OBJECTS.get(type)))) + .as("test fitting %s into DOUBLE", type) + .containsExactly(6L, 17.0)); + } + + // Some testing utility methods. + + private static List mergeAndDiff( + @Nullable Schema currentSchema, Schema upcomingSchema) { + Schema afterSchema = getLeastCommonSchema(currentSchema, upcomingSchema); + return getSchemaDifference(TABLE_ID, currentSchema, afterSchema); + } + + private static Schema of(Object... args) { + List argList = new ArrayList<>(Arrays.asList(args)); + Preconditions.checkState(argList.size() % 2 == 0); + Schema.Builder builder = Schema.newBuilder(); + while (!argList.isEmpty()) { + String colName = (String) argList.remove(0); + DataType colType = (DataType) argList.remove(0); + builder.physicalColumn(colName, colType); + } + return builder.build(); + } + + private static long dateOf(int year, int month, int dayOfMonth) { + return LocalDate.of(year, month, dayOfMonth).toEpochDay(); + } + + private static TimestampData tsOf(String year, String month, String dayOfMonth) { + return TimestampData.fromTimestamp( + Timestamp.valueOf(String.format("%s-%s-%s 00:00:00", year, month, dayOfMonth))); + } + + private static LocalZonedTimestampData ltzTsOf(String year, String month, String dayOfMonth) { + return LocalZonedTimestampData.fromEpochMillis( + Instant.parse(String.format("%s-%s-%sT00:00:00Z", year, month, dayOfMonth)) + .toEpochMilli()); + } + + private static ZonedTimestampData zTsOf(String year, String month, String dayOfMonth) { + return ZonedTimestampData.fromZonedDateTime( + ZonedDateTime.ofInstant( + Instant.parse(String.format("%s-%s-%sT00:00:00Z", year, month, dayOfMonth)), + ZoneId.of("UTC"))); + } + + private static DecimalData decOf(long value) { + return DecimalData.fromBigDecimal( + BigDecimal.valueOf(value), DecimalType.MAX_PRECISION, DecimalType.DEFAULT_SCALE); + } + + private static BinaryStringData binStrOf(String str) { + return BinaryStringData.fromString(str); + } + + private static byte[] binOf(String str) { + return str.getBytes(); + } +} diff --git a/flink-cdc-composer/pom.xml b/flink-cdc-composer/pom.xml index 5971f3b459a..abba69f13aa 100644 --- a/flink-cdc-composer/pom.xml +++ b/flink-cdc-composer/pom.xml @@ -44,6 +44,12 @@ limitations under the License. ${project.version} test + + org.apache.flink + flink-cdc-pipeline-connector-stimps + ${project.version} + test + org.apache.flink flink-clients diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposer.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposer.java index 579eb960759..b6774a6c175 100644 --- a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposer.java +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposer.java @@ -23,15 +23,18 @@ import org.apache.flink.cdc.common.pipeline.PipelineOptions; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.sink.DataSink; +import org.apache.flink.cdc.common.source.DataSource; import org.apache.flink.cdc.composer.PipelineComposer; import org.apache.flink.cdc.composer.PipelineExecution; import org.apache.flink.cdc.composer.definition.PipelineDef; import org.apache.flink.cdc.composer.flink.coordination.OperatorIDGenerator; import org.apache.flink.cdc.composer.flink.translator.DataSinkTranslator; import org.apache.flink.cdc.composer.flink.translator.DataSourceTranslator; +import org.apache.flink.cdc.composer.flink.translator.DistributedSchemaOperatorTranslator; import org.apache.flink.cdc.composer.flink.translator.PartitioningTranslator; import org.apache.flink.cdc.composer.flink.translator.SchemaOperatorTranslator; import org.apache.flink.cdc.composer.flink.translator.TransformTranslator; +import org.apache.flink.cdc.runtime.partitioning.PartitioningEvent; import org.apache.flink.cdc.runtime.serializer.event.EventSerializer; import org.apache.flink.configuration.DeploymentOptions; import org.apache.flink.streaming.api.datastream.DataStream; @@ -94,76 +97,120 @@ public PipelineExecution compose(PipelineDef pipelineDef) { int parallelism = pipelineDefConfig.get(PipelineOptions.PIPELINE_PARALLELISM); env.getConfig().setParallelism(parallelism); + translate(env, pipelineDef); + + // Add framework JARs + addFrameworkJars(); + + return new FlinkPipelineExecution( + env, pipelineDefConfig.get(PipelineOptions.PIPELINE_NAME), isBlocking); + } + + private void translate(StreamExecutionEnvironment env, PipelineDef pipelineDef) { + Configuration pipelineDefConfig = pipelineDef.getConfig(); + int parallelism = pipelineDefConfig.get(PipelineOptions.PIPELINE_PARALLELISM); SchemaChangeBehavior schemaChangeBehavior = pipelineDefConfig.get(PipelineOptions.PIPELINE_SCHEMA_CHANGE_BEHAVIOR); - // Build Source Operator + // Initialize translators DataSourceTranslator sourceTranslator = new DataSourceTranslator(); - DataStream stream = - sourceTranslator.translate( - pipelineDef.getSource(), env, pipelineDefConfig, parallelism); - - // Build PreTransformOperator for processing Schema Event TransformTranslator transformTranslator = new TransformTranslator(); - stream = - transformTranslator.translatePreTransform( - stream, - pipelineDef.getTransforms(), - pipelineDef.getUdfs(), - pipelineDef.getModels()); - - // Schema operator + PartitioningTranslator partitioningTranslator = new PartitioningTranslator(); SchemaOperatorTranslator schemaOperatorTranslator = new SchemaOperatorTranslator( schemaChangeBehavior, pipelineDefConfig.get(PipelineOptions.PIPELINE_SCHEMA_OPERATOR_UID), pipelineDefConfig.get(PipelineOptions.PIPELINE_SCHEMA_OPERATOR_RPC_TIMEOUT), pipelineDefConfig.get(PipelineOptions.PIPELINE_LOCAL_TIME_ZONE)); + DistributedSchemaOperatorTranslator distributedSchemaOperatorTranslator = + new DistributedSchemaOperatorTranslator( + pipelineDefConfig.get(PipelineOptions.PIPELINE_SCHEMA_OPERATOR_UID), + pipelineDefConfig.get(PipelineOptions.PIPELINE_SCHEMA_OPERATOR_RPC_TIMEOUT), + pipelineDefConfig.get(PipelineOptions.PIPELINE_LOCAL_TIME_ZONE)); + DataSinkTranslator sinkTranslator = new DataSinkTranslator(); + + // And required constructors OperatorIDGenerator schemaOperatorIDGenerator = new OperatorIDGenerator(schemaOperatorTranslator.getSchemaOperatorUid()); + DataSource dataSource = + sourceTranslator.createDataSource(pipelineDef.getSource(), pipelineDefConfig, env); + DataSink dataSink = + sinkTranslator.createDataSink(pipelineDef.getSink(), pipelineDefConfig, env); + + // O ---> Source + DataStream stream = + sourceTranslator.translate( + pipelineDef.getSource(), dataSource, env, pipelineDefConfig, parallelism); - // Build PostTransformOperator for processing Data Event + // Source ---> PreTransform stream = - transformTranslator.translatePostTransform( + transformTranslator.translatePreTransform( stream, pipelineDef.getTransforms(), - pipelineDef.getConfig().get(PipelineOptions.PIPELINE_LOCAL_TIME_ZONE), pipelineDef.getUdfs(), pipelineDef.getModels()); - // Build DataSink in advance as schema operator requires MetadataApplier - DataSinkTranslator sinkTranslator = new DataSinkTranslator(); - DataSink dataSink = - sinkTranslator.createDataSink(pipelineDef.getSink(), pipelineDefConfig, env); - + // PreTransform ---> PostTransform stream = - schemaOperatorTranslator.translate( - stream, - parallelism, - dataSink.getMetadataApplier() - .setAcceptedSchemaEvolutionTypes( - pipelineDef.getSink().getIncludedSchemaEvolutionTypes()), - pipelineDef.getRoute()); - - // Build Partitioner used to shuffle Event - PartitioningTranslator partitioningTranslator = new PartitioningTranslator(); - stream = - partitioningTranslator.translate( + transformTranslator.translatePostTransform( stream, - parallelism, - parallelism, - schemaOperatorIDGenerator.generate(), - dataSink.getDataChangeEventHashFunctionProvider(parallelism)); - - // Build Sink Operator - sinkTranslator.translate( - pipelineDef.getSink(), stream, dataSink, schemaOperatorIDGenerator.generate()); - - // Add framework JARs - addFrameworkJars(); + pipelineDef.getTransforms(), + pipelineDef.getConfig().get(PipelineOptions.PIPELINE_LOCAL_TIME_ZONE), + pipelineDef.getUdfs(), + pipelineDef.getModels()); - return new FlinkPipelineExecution( - env, pipelineDefConfig.get(PipelineOptions.PIPELINE_NAME), isBlocking); + if (dataSource.canContainDistributedTables()) { + // Translate a distributed topology for sources with distributed tables + // PostTransform -> Partitioning + DataStream partitionedStream = + partitioningTranslator.translateDistributed( + stream, + parallelism, + parallelism, + dataSink.getDataChangeEventHashFunctionProvider(parallelism)); + + // Partitioning -> Schema Operator + stream = + distributedSchemaOperatorTranslator.translate( + partitionedStream, + parallelism, + dataSink.getMetadataApplier() + .setAcceptedSchemaEvolutionTypes( + pipelineDef + .getSink() + .getIncludedSchemaEvolutionTypes()), + pipelineDef.getRoute()); + + // Schema Operator -> Sink + sinkTranslator.translate( + pipelineDef.getSink(), stream, dataSink, schemaOperatorIDGenerator.generate()); + } else { + // Translate a regular topology for sources without distributed tables + // PostTransform ---> Schema Operator + stream = + schemaOperatorTranslator.translate( + stream, + parallelism, + dataSink.getMetadataApplier() + .setAcceptedSchemaEvolutionTypes( + pipelineDef + .getSink() + .getIncludedSchemaEvolutionTypes()), + pipelineDef.getRoute()); + + // Schema Operator ---(shuffled)---> Partitioning + stream = + partitioningTranslator.translateRegular( + stream, + parallelism, + parallelism, + schemaOperatorIDGenerator.generate(), + dataSink.getDataChangeEventHashFunctionProvider(parallelism)); + + // Partitioning ---> Sink ---> X + sinkTranslator.translate( + pipelineDef.getSink(), stream, dataSink, schemaOperatorIDGenerator.generate()); + } } private void addFrameworkJars() { diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DataSourceTranslator.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DataSourceTranslator.java index 90bbea73ee3..cfd332f3539 100644 --- a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DataSourceTranslator.java +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DataSourceTranslator.java @@ -41,12 +41,10 @@ public class DataSourceTranslator { public DataStreamSource translate( SourceDef sourceDef, + DataSource dataSource, StreamExecutionEnvironment env, Configuration pipelineConfig, int sourceParallelism) { - // Create data source - DataSource dataSource = createDataSource(sourceDef, env, pipelineConfig); - // Get source provider EventSourceProvider eventSourceProvider = dataSource.getEventSourceProvider(); if (eventSourceProvider instanceof FlinkSourceProvider) { @@ -78,8 +76,8 @@ public DataStreamSource translate( } } - private DataSource createDataSource( - SourceDef sourceDef, StreamExecutionEnvironment env, Configuration pipelineConfig) { + public DataSource createDataSource( + SourceDef sourceDef, Configuration pipelineConfig, StreamExecutionEnvironment env) { // Search the data source factory DataSourceFactory sourceFactory = FactoryDiscoveryUtils.getFactoryByIdentifier( @@ -87,13 +85,11 @@ private DataSource createDataSource( // Add source JAR to environment FactoryDiscoveryUtils.getJarPathByIdentifier(sourceFactory) .ifPresent(jar -> FlinkEnvironmentUtils.addJar(env, jar)); - DataSource dataSource = - sourceFactory.createDataSource( - new FactoryHelper.DefaultContext( - sourceDef.getConfig(), - pipelineConfig, - Thread.currentThread().getContextClassLoader())); - return dataSource; + return sourceFactory.createDataSource( + new FactoryHelper.DefaultContext( + sourceDef.getConfig(), + pipelineConfig, + Thread.currentThread().getContextClassLoader())); } private String generateDefaultSourceName(SourceDef sourceDef) { diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DistributedSchemaOperatorTranslator.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DistributedSchemaOperatorTranslator.java new file mode 100644 index 00000000000..d45876ef5bd --- /dev/null +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/DistributedSchemaOperatorTranslator.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.composer.flink.translator; + +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.composer.definition.RouteDef; +import org.apache.flink.cdc.runtime.operators.schema.distributed.SchemaOperator; +import org.apache.flink.cdc.runtime.operators.schema.distributed.SchemaOperatorFactory; +import org.apache.flink.cdc.runtime.partitioning.PartitioningEvent; +import org.apache.flink.cdc.runtime.typeutils.EventTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStream; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; + +/** Translator used to build {@link SchemaOperator} for schema event process. */ +public class DistributedSchemaOperatorTranslator { + private final String schemaOperatorUid; + private final Duration rpcTimeOut; + private final String timezone; + + public DistributedSchemaOperatorTranslator( + String schemaOperatorUid, Duration rpcTimeOut, String timezone) { + this.schemaOperatorUid = schemaOperatorUid; + this.rpcTimeOut = rpcTimeOut; + this.timezone = timezone; + } + + public DataStream translate( + DataStream input, + int parallelism, + MetadataApplier metadataApplier, + List routes) { + return addSchemaOperator(input, parallelism, metadataApplier, routes, timezone); + } + + private DataStream addSchemaOperator( + DataStream input, + int parallelism, + MetadataApplier metadataApplier, + List routes, + String timezone) { + List routingRules = new ArrayList<>(); + for (RouteDef route : routes) { + routingRules.add( + new RouteRule( + route.getSourceTable(), + route.getSinkTable(), + route.getReplaceSymbol().orElse(null))); + } + return input.transform( + "SchemaMapper", + new EventTypeInfo(), + new SchemaOperatorFactory( + metadataApplier, routingRules, rpcTimeOut, timezone)) + .uid(schemaOperatorUid) + .setParallelism(parallelism); + } +} diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/PartitioningTranslator.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/PartitioningTranslator.java index b4e0b34b8f7..1c7d9fe3273 100644 --- a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/PartitioningTranslator.java +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/PartitioningTranslator.java @@ -21,23 +21,25 @@ import org.apache.flink.cdc.common.event.DataChangeEvent; import org.apache.flink.cdc.common.event.Event; import org.apache.flink.cdc.common.function.HashFunctionProvider; +import org.apache.flink.cdc.runtime.partitioning.DistributedPrePartitionOperator; import org.apache.flink.cdc.runtime.partitioning.EventPartitioner; +import org.apache.flink.cdc.runtime.partitioning.PartitioningEvent; import org.apache.flink.cdc.runtime.partitioning.PartitioningEventKeySelector; import org.apache.flink.cdc.runtime.partitioning.PostPartitionProcessor; -import org.apache.flink.cdc.runtime.partitioning.PrePartitionOperator; +import org.apache.flink.cdc.runtime.partitioning.RegularPrePartitionOperator; import org.apache.flink.cdc.runtime.typeutils.EventTypeInfo; import org.apache.flink.cdc.runtime.typeutils.PartitioningEventTypeInfo; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.streaming.api.datastream.DataStream; /** - * Translator used to build {@link PrePartitionOperator}, {@link EventPartitioner} and {@link + * Translator used to build {@link RegularPrePartitionOperator}, {@link EventPartitioner} and {@link * PostPartitionProcessor} which are responsible for events partition. */ @Internal public class PartitioningTranslator { - public DataStream translate( + public DataStream translateRegular( DataStream input, int upstreamParallelism, int downstreamParallelism, @@ -46,11 +48,25 @@ public DataStream translate( return input.transform( "PrePartition", new PartitioningEventTypeInfo(), - new PrePartitionOperator( + new RegularPrePartitionOperator( schemaOperatorID, downstreamParallelism, hashFunctionProvider)) .setParallelism(upstreamParallelism) .partitionCustom(new EventPartitioner(), new PartitioningEventKeySelector()) .map(new PostPartitionProcessor(), new EventTypeInfo()) .name("PostPartition"); } + + public DataStream translateDistributed( + DataStream input, + int upstreamParallelism, + int downstreamParallelism, + HashFunctionProvider hashFunctionProvider) { + return input.transform( + "Partitioning", + new PartitioningEventTypeInfo(), + new DistributedPrePartitionOperator( + downstreamParallelism, hashFunctionProvider)) + .setParallelism(upstreamParallelism) + .partitionCustom(new EventPartitioner(), new PartitioningEventKeySelector()); + } } diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/SchemaOperatorTranslator.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/SchemaOperatorTranslator.java index c5cadcd1e4a..b30448c4ec7 100644 --- a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/SchemaOperatorTranslator.java +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/SchemaOperatorTranslator.java @@ -23,8 +23,8 @@ import org.apache.flink.cdc.common.route.RouteRule; import org.apache.flink.cdc.common.sink.MetadataApplier; import org.apache.flink.cdc.composer.definition.RouteDef; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperatorFactory; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperatorFactory; import org.apache.flink.cdc.runtime.typeutils.EventTypeInfo; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; diff --git a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java index 679ff9cc62b..1688c3da4a1 100644 --- a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java +++ b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java @@ -813,7 +813,7 @@ void testMergingWithRoute() throws Exception { Schema.newBuilder() .physicalColumn("id", DataTypes.BIGINT()) .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.BIGINT()) + .physicalColumn("age", DataTypes.INT()) .physicalColumn("description", DataTypes.STRING()) .physicalColumn("last_name", DataTypes.STRING()) .physicalColumn("gender", DataTypes.STRING()) @@ -826,13 +826,13 @@ void testMergingWithRoute() throws Exception { "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[1, Alice, 18], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[2, Bob, 20], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[2, Bob, 20], after=[2, Bob, 30], op=UPDATE, meta=()}", - "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`description` STRING, position=LAST, existedColumnName=null}]}", - "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={age=BIGINT, id=BIGINT}, oldTypeMapping={age=INT, id=INT}}", + "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`description` STRING, position=AFTER, existedColumnName=age}]}", + "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={id=BIGINT}, oldTypeMapping={id=INT}}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[3, Charlie, 15, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[4, Donald, 25, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[4, Donald, 25, student], after=[], op=DELETE, meta=()}", - "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`last_name` STRING, position=LAST, existedColumnName=null}]}", - "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`gender` STRING, position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`last_name` STRING, position=AFTER, existedColumnName=description}]}", + "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`gender` STRING, position=AFTER, existedColumnName=last_name}]}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[5, null, 24, null, Eliza, null], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[6, Frank, 30, student, null, male], op=INSERT, meta=()}"); } @@ -1021,7 +1021,7 @@ void testTransformMergingWithRoute() throws Exception { Schema.newBuilder() .physicalColumn("id", DataTypes.BIGINT()) .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.BIGINT()) + .physicalColumn("age", DataTypes.INT()) .physicalColumn("last_name", DataTypes.STRING()) .physicalColumn("description", DataTypes.STRING()) .physicalColumn("gender", DataTypes.STRING()) @@ -1034,12 +1034,12 @@ void testTransformMergingWithRoute() throws Exception { "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[1, Alice, 18, last_name], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[2, Bob, 20, last_name], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[2, Bob, 20, last_name], after=[2, Bob, 30, last_name], op=UPDATE, meta=()}", - "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`description` STRING, position=LAST, existedColumnName=null}]}", - "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={age=BIGINT, id=BIGINT}, oldTypeMapping={age=INT, id=INT}}", + "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`description` STRING, position=AFTER, existedColumnName=last_name}]}", + "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={id=BIGINT}, oldTypeMapping={id=INT}}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[3, Charlie, 15, last_name, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[4, Donald, 25, last_name, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[4, Donald, 25, last_name, student], after=[], op=DELETE, meta=()}", - "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`gender` STRING, position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`gender` STRING, position=AFTER, existedColumnName=description}]}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[5, Eliza, 24, last_name, null, null], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[6, Frank, 30, last_name, student, male], op=INSERT, meta=()}"); } @@ -1271,9 +1271,11 @@ void testMergingDecimalWithVariousPrecisions(ValuesDataSink.SinkApi sinkApi) thr Stream.of( "CreateTableEvent{tableId={}, schema=columns={`id` INT,`name` STRING,`age` INT,`fav_num` TINYINT}, primaryKeys=id, options=()}", "DataChangeEvent{tableId={}, before=[], after=[1, Alice, 17, 1], op=INSERT, meta=()}", - "AlterColumnTypeEvent{tableId={}, typeMapping={fav_num=BIGINT}, oldTypeMapping={fav_num=TINYINT}}", + "AlterColumnTypeEvent{tableId={}, typeMapping={fav_num=SMALLINT}, oldTypeMapping={fav_num=TINYINT}}", "DataChangeEvent{tableId={}, before=[], after=[2, Alice, 17, 22], op=INSERT, meta=()}", + "AlterColumnTypeEvent{tableId={}, typeMapping={fav_num=INT}, oldTypeMapping={fav_num=SMALLINT}}", "DataChangeEvent{tableId={}, before=[], after=[3, Alice, 17, 3333], op=INSERT, meta=()}", + "AlterColumnTypeEvent{tableId={}, typeMapping={fav_num=BIGINT}, oldTypeMapping={fav_num=INT}}", "DataChangeEvent{tableId={}, before=[], after=[4, Alice, 17, 44444444], op=INSERT, meta=()}", "AlterColumnTypeEvent{tableId={}, typeMapping={fav_num=DECIMAL(19, 0)}, oldTypeMapping={fav_num=BIGINT}}", "DataChangeEvent{tableId={}, before=[], after=[5, Alice, 17, 555555555555555], op=INSERT, meta=()}", diff --git a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java index df47307d09a..3a75176a9d1 100644 --- a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java +++ b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java @@ -847,7 +847,7 @@ void testMergingWithRoute() throws Exception { Schema.newBuilder() .physicalColumn("id", DataTypes.BIGINT()) .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.BIGINT()) + .physicalColumn("age", DataTypes.INT()) .physicalColumn("description", DataTypes.STRING()) .physicalColumn("last_name", DataTypes.STRING()) .physicalColumn("gender", DataTypes.STRING()) @@ -861,7 +861,7 @@ void testMergingWithRoute() throws Exception { "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[2, Bob, 20], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[2, Bob, 20], after=[2, Bob, 30], op=UPDATE, meta=()}", "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`description` STRING, position=LAST, existedColumnName=null}]}", - "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={age=BIGINT, id=BIGINT}, oldTypeMapping={age=INT, id=INT}}", + "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={id=BIGINT}, oldTypeMapping={id=INT}}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[3, Charlie, 15, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[4, Donald, 25, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[4, Donald, 25, student], after=[], op=DELETE, meta=()}", @@ -1054,7 +1054,7 @@ void testTransformMergingWithRoute() throws Exception { Schema.newBuilder() .physicalColumn("id", DataTypes.BIGINT()) .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.BIGINT()) + .physicalColumn("age", DataTypes.INT()) .physicalColumn("last_name", DataTypes.STRING()) .physicalColumn("description", DataTypes.STRING()) .physicalColumn("gender", DataTypes.STRING()) @@ -1068,7 +1068,7 @@ void testTransformMergingWithRoute() throws Exception { "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[2, Bob, 20, last_name], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[2, Bob, 20, last_name], after=[2, Bob, 30, last_name], op=UPDATE, meta=()}", "AddColumnEvent{tableId=default_namespace.default_schema.merged, addedColumns=[ColumnWithPosition{column=`description` STRING, position=LAST, existedColumnName=null}]}", - "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={age=BIGINT, id=BIGINT}, oldTypeMapping={age=INT, id=INT}}", + "AlterColumnTypeEvent{tableId=default_namespace.default_schema.merged, typeMapping={id=BIGINT}, oldTypeMapping={id=INT}}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[3, Charlie, 15, last_name, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[], after=[4, Donald, 25, last_name, student], op=INSERT, meta=()}", "DataChangeEvent{tableId=default_namespace.default_schema.merged, before=[4, Donald, 25, last_name, student], after=[], op=DELETE, meta=()}", diff --git a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkSchemaInferencingPipelineITCase.java b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkSchemaInferencingPipelineITCase.java new file mode 100644 index 00000000000..d179ced3b10 --- /dev/null +++ b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkSchemaInferencingPipelineITCase.java @@ -0,0 +1,722 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.composer.flink; + +import org.apache.flink.cdc.common.configuration.Configuration; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.PipelineOptions; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.composer.PipelineExecution; +import org.apache.flink.cdc.composer.definition.PipelineDef; +import org.apache.flink.cdc.composer.definition.SinkDef; +import org.apache.flink.cdc.composer.definition.SourceDef; +import org.apache.flink.cdc.connectors.stimps.factory.StimpsDataFactory; +import org.apache.flink.cdc.connectors.stimps.source.StimpsSourceOptions; +import org.apache.flink.cdc.connectors.values.ValuesDatabase; +import org.apache.flink.cdc.connectors.values.factory.ValuesDataFactory; +import org.apache.flink.cdc.connectors.values.sink.ValuesDataSink; +import org.apache.flink.cdc.connectors.values.sink.ValuesDataSinkOptions; +import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; +import org.apache.flink.test.junit5.MiniClusterExtension; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.Collections; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static org.apache.flink.configuration.CoreOptions.ALWAYS_PARENT_FIRST_LOADER_PATTERNS_ADDITIONAL; + +/** Integration test for {@link FlinkPipelineComposer} in schema inferencing cases. */ +class FlinkSchemaInferencingPipelineITCase { + + private static final int MAX_PARALLELISM = 4; + private static final int UPSTREAM_TABLE_COUNT = 4; + + // Always use parent-first classloader for CDC classes. + // The reason is that ValuesDatabase uses static field for holding data, we need to make sure + // the class is loaded by AppClassloader so that we can verify data in the test case. + private static final org.apache.flink.configuration.Configuration MINI_CLUSTER_CONFIG = + new org.apache.flink.configuration.Configuration(); + + static { + MINI_CLUSTER_CONFIG.set( + ALWAYS_PARENT_FIRST_LOADER_PATTERNS_ADDITIONAL, + Collections.singletonList("org.apache.flink.cdc")); + } + + /** + * Use {@link MiniClusterExtension} to reduce the overhead of restarting the MiniCluster for + * every test case. + */ + @RegisterExtension + static final MiniClusterExtension MINI_CLUSTER_RESOURCE = + new MiniClusterExtension( + new MiniClusterResourceConfiguration.Builder() + .setNumberTaskManagers(1) + .setNumberSlotsPerTaskManager(MAX_PARALLELISM) + .setConfiguration(MINI_CLUSTER_CONFIG) + .build()); + + private final PrintStream standardOut = System.out; + private final ByteArrayOutputStream outCaptor = new ByteArrayOutputStream(); + + @BeforeEach + void init() { + // Take over STDOUT as we need to check the output of values sink + System.setOut(new PrintStream(outCaptor)); + // Initialize in-memory database + ValuesDatabase.clear(); + } + + @AfterEach + void cleanup() { + System.setOut(standardOut); + System.out.println( + "NOTICE: This is a fuzzy test. Please check if value sink prints expected events:"); + System.out.println("================================"); + System.out.print(outCaptor); + System.out.println("================================"); + } + + @ParameterizedTest + @EnumSource + void testSingleTableInMultiplePartitionsSourceInSingleParallelism( + ValuesDataSink.SinkApi sinkApi) throws Exception { + FlinkPipelineComposer composer = FlinkPipelineComposer.ofMiniCluster(); + + // Setup value source + Configuration sourceConfig = new Configuration(); + sourceConfig.set(StimpsSourceOptions.DISTRIBUTED_TABLES, true); + SourceDef sourceDef = + new SourceDef(StimpsDataFactory.IDENTIFIER, "STIMP Source", sourceConfig); + + // Setup value sink + Configuration sinkConfig = new Configuration(); + sinkConfig.set(ValuesDataSinkOptions.MATERIALIZED_IN_MEMORY, true); + sinkConfig.set(ValuesDataSinkOptions.SINK_API, sinkApi); + SinkDef sinkDef = new SinkDef(ValuesDataFactory.IDENTIFIER, "Value Sink", sinkConfig); + + // Setup pipeline + Configuration pipelineConfig = new Configuration(); + pipelineConfig.set(PipelineOptions.PIPELINE_PARALLELISM, 1); + pipelineConfig.set( + PipelineOptions.PIPELINE_SCHEMA_CHANGE_BEHAVIOR, SchemaChangeBehavior.LENIENT); + PipelineDef pipelineDef = + new PipelineDef( + sourceDef, + sinkDef, + Collections.emptyList(), + Collections.emptyList(), + Collections.emptyList(), + pipelineConfig); + + // Execute the pipeline + PipelineExecution execution = composer.compose(pipelineDef); + execution.execute(); + + // Validate generated downstream schema + for (int idx = 0; idx < UPSTREAM_TABLE_COUNT; idx++) { + Schema schema = + ValuesDatabase.getTableSchema( + TableId.tableId( + "default_namespace", "default_database", "table_" + idx)); + + // The order of result is determined. + Assertions.assertThat(schema.getColumns()) + .containsExactly( + Column.physicalColumn("id", DataTypes.STRING()), + Column.physicalColumn("col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("subtask_0_col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("subtask_0_col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn( + "subtask_0_col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn("col_inttype", DataTypes.INT()), + Column.physicalColumn("subtask_0_col_inttype", DataTypes.INT()), + Column.physicalColumn("col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("subtask_0_col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("subtask_0_col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("subtask_0_col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn( + "subtask_0_col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn("col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("subtask_0_col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn( + "subtask_0_col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn("col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("subtask_0_col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn( + "subtask_0_col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn("col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("subtask_0_col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "subtask_0_col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "subtask_0_col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "col_localzonedtimestamptype", DataTypes.TIMESTAMP_LTZ(9)), + Column.physicalColumn( + "subtask_0_col_localzonedtimestamptype", + DataTypes.TIMESTAMP_LTZ(9))); + } + + String outputStr = outCaptor.toString(); + + IntStream.range(0, 168) + .forEach( + seqNum -> + Assertions.assertThat(outputStr) + .contains(String.format("__$0$%d$__", seqNum))); + + // In single-parallelism mode, orderliness of output is determined. + String[] dataLines = outputStr.split(System.lineSeparator()); + Assertions.assertThat(dataLines) + .containsExactly( + "CreateTableEvent{tableId=default_namespace.default_database.table_0, schema=columns={`id` STRING}, primaryKeys=id, partitionKeys=id, options=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$0$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$1$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$2$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$3$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$4$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$5$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$6$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$7$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$8$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$9$__], op=INSERT, meta=()}", + "CreateTableEvent{tableId=default_namespace.default_database.table_1, schema=columns={`id` STRING}, primaryKeys=id, partitionKeys=id, options=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$10$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$11$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$12$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$13$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$14$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$15$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$16$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$17$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$18$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$19$__], op=INSERT, meta=()}", + "CreateTableEvent{tableId=default_namespace.default_database.table_2, schema=columns={`id` STRING}, primaryKeys=id, partitionKeys=id, options=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$20$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$21$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$22$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$23$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$24$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$25$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$26$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$27$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$28$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$29$__], op=INSERT, meta=()}", + "CreateTableEvent{tableId=default_namespace.default_database.table_3, schema=columns={`id` STRING}, primaryKeys=id, partitionKeys=id, options=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$30$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$31$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$32$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$33$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$34$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$35$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$36$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$37$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$38$__], op=INSERT, meta=()}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$39$__], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$40$__, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$41$__, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$42$__, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$43$__, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$44$__, true, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$45$__, true, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$46$__, true, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_booleantype` BOOLEAN, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$47$__, true, true], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$48$__, true, true, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$49$__, true, true, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$50$__, true, true, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$51$__, true, true, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$52$__, true, true, 17, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$53$__, true, true, 17, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$54$__, true, true, 17, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_tinyinttype` TINYINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$55$__, true, true, 17, 17], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$56$__, true, true, 17, 17, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$57$__, true, true, 17, 17, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$58$__, true, true, 17, 17, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$59$__, true, true, 17, 17, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$60$__, true, true, 17, 17, 34, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$61$__, true, true, 17, 17, 34, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$62$__, true, true, 17, 17, 34, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_smallinttype` SMALLINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$63$__, true, true, 17, 17, 34, 34], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$64$__, true, true, 17, 17, 34, 34, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$65$__, true, true, 17, 17, 34, 34, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$66$__, true, true, 17, 17, 34, 34, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$67$__, true, true, 17, 17, 34, 34, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$68$__, true, true, 17, 17, 34, 34, 68, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$69$__, true, true, 17, 17, 34, 34, 68, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$70$__, true, true, 17, 17, 34, 34, 68, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_inttype` INT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$71$__, true, true, 17, 17, 34, 34, 68, 68], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$72$__, true, true, 17, 17, 34, 34, 68, 68, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$73$__, true, true, 17, 17, 34, 34, 68, 68, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$74$__, true, true, 17, 17, 34, 34, 68, 68, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$75$__, true, true, 17, 17, 34, 34, 68, 68, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$76$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$77$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$78$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_biginttype` BIGINT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$79$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$80$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$81$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$82$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$83$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$84$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$85$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$86$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_floattype` FLOAT, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$87$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$88$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$89$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$90$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$91$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$92$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$93$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$94$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_doubletype` DOUBLE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$95$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$96$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$97$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$98$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$99$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$100$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$101$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$102$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_decimaltype` DECIMAL(17, 11), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$103$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$104$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$105$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$106$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$107$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$108$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$109$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$110$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_chartype` CHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$111$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$112$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$113$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$114$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$115$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$116$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$117$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$118$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varchartype` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$119$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$120$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$121$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$122$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$123$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$124$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$125$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$126$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_binarytype` BINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$127$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$128$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$129$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$130$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$131$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$132$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$133$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$134$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_varbinarytype` VARBINARY(17), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$135$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$136$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$137$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$138$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$139$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$140$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$141$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$142$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timetype` TIME(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$143$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$144$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$145$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$146$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$147$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$148$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$149$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$150$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_timestamptype` TIMESTAMP(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$151$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$152$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$153$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$154$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$155$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$156$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$157$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$158$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_zonedtimestamptype` TIMESTAMP(9) WITH TIME ZONE, position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$159$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$160$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$161$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$162$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$163$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_0, addedColumns=[ColumnWithPosition{column=`subtask_0_col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_0, before=[], after=[__$0$164$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_1, addedColumns=[ColumnWithPosition{column=`subtask_0_col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_1, before=[], after=[__$0$165$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_2, addedColumns=[ColumnWithPosition{column=`subtask_0_col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_2, before=[], after=[__$0$166$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00, 2019-12-31T18:00], op=INSERT, meta=()}", + "AddColumnEvent{tableId=default_namespace.default_database.table_3, addedColumns=[ColumnWithPosition{column=`subtask_0_col_localzonedtimestamptype` TIMESTAMP_LTZ(9), position=LAST, existedColumnName=null}]}", + "DataChangeEvent{tableId=default_namespace.default_database.table_3, before=[], after=[__$0$167$__, true, true, 17, 17, 34, 34, 68, 68, 136, 136, 272.0, 272.0, 544.0, 544.0, 1088.00000000000, 1088.00000000000, Alice, Alice, Bob, Bob, Q2ljYWRh, Q2ljYWRh, RGVycmlkYQ==, RGVycmlkYQ==, 64800000, 64800000, 2020-07-17T18:00, 2020-07-17T18:00, 1970-01-05T05:20:00.000123456+08:00, 1970-01-05T05:20:00.000123456+08:00, 2019-12-31T18:00, 2019-12-31T18:00], op=INSERT, meta=()}"); + } + + @ParameterizedTest + @EnumSource + void testSingleTableInMultiplePartitionsSourceInMultipleParallelism( + ValuesDataSink.SinkApi sinkApi) throws Exception { + FlinkPipelineComposer composer = FlinkPipelineComposer.ofMiniCluster(); + + // Setup value source + Configuration sourceConfig = new Configuration(); + sourceConfig.set(StimpsSourceOptions.DISTRIBUTED_TABLES, true); + sourceConfig.set(StimpsSourceOptions.TABLE_COUNT, UPSTREAM_TABLE_COUNT); + SourceDef sourceDef = + new SourceDef(StimpsDataFactory.IDENTIFIER, "STIMP Source", sourceConfig); + + // Setup value sink + Configuration sinkConfig = new Configuration(); + sinkConfig.set(ValuesDataSinkOptions.MATERIALIZED_IN_MEMORY, true); + sinkConfig.set(ValuesDataSinkOptions.SINK_API, sinkApi); + SinkDef sinkDef = new SinkDef(ValuesDataFactory.IDENTIFIER, "Value Sink", sinkConfig); + + // Setup pipeline + Configuration pipelineConfig = new Configuration(); + pipelineConfig.set(PipelineOptions.PIPELINE_PARALLELISM, MAX_PARALLELISM); + pipelineConfig.set( + PipelineOptions.PIPELINE_SCHEMA_CHANGE_BEHAVIOR, SchemaChangeBehavior.LENIENT); + PipelineDef pipelineDef = + new PipelineDef( + sourceDef, + sinkDef, + Collections.emptyList(), + Collections.emptyList(), + Collections.emptyList(), + pipelineConfig); + + // Execute the pipeline + PipelineExecution execution = composer.compose(pipelineDef); + + execution.execute(); + + // Validate generated downstream schema + for (int idx = 0; idx < UPSTREAM_TABLE_COUNT; idx++) { + Schema schema = + ValuesDatabase.getTableSchema( + TableId.tableId( + "default_namespace", "default_database", "table_" + idx)); + + // The order of result schema is uncertain. + Assertions.assertThat(schema.getColumns()) + .containsExactlyInAnyOrder( + Column.physicalColumn("id", DataTypes.STRING()), + Column.physicalColumn("col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("subtask_0_col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("subtask_1_col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("subtask_2_col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("subtask_3_col_booleantype", DataTypes.BOOLEAN()), + Column.physicalColumn("col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("subtask_0_col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("subtask_1_col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("subtask_2_col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("subtask_3_col_tinyinttype", DataTypes.TINYINT()), + Column.physicalColumn("col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn( + "subtask_0_col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn( + "subtask_1_col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn( + "subtask_2_col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn( + "subtask_3_col_smallinttype", DataTypes.SMALLINT()), + Column.physicalColumn("col_inttype", DataTypes.INT()), + Column.physicalColumn("subtask_0_col_inttype", DataTypes.INT()), + Column.physicalColumn("subtask_1_col_inttype", DataTypes.INT()), + Column.physicalColumn("subtask_2_col_inttype", DataTypes.INT()), + Column.physicalColumn("subtask_3_col_inttype", DataTypes.INT()), + Column.physicalColumn("col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("subtask_0_col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("subtask_1_col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("subtask_2_col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("subtask_3_col_biginttype", DataTypes.BIGINT()), + Column.physicalColumn("col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn( + "subtask_0_col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn( + "subtask_1_col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn( + "subtask_2_col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn( + "subtask_3_col_decimaltype", DataTypes.DECIMAL(17, 11)), + Column.physicalColumn("col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("subtask_0_col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("subtask_1_col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("subtask_2_col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("subtask_3_col_floattype", DataTypes.FLOAT()), + Column.physicalColumn("col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("subtask_0_col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("subtask_1_col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("subtask_2_col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("subtask_3_col_doubletype", DataTypes.DOUBLE()), + Column.physicalColumn("col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("subtask_0_col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("subtask_1_col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("subtask_2_col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("subtask_3_col_chartype", DataTypes.CHAR(17)), + Column.physicalColumn("col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn( + "subtask_0_col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn( + "subtask_1_col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn( + "subtask_2_col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn( + "subtask_3_col_varchartype", DataTypes.VARCHAR(17)), + Column.physicalColumn("col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("subtask_0_col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("subtask_1_col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("subtask_2_col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("subtask_3_col_binarytype", DataTypes.BINARY(17)), + Column.physicalColumn("col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn( + "subtask_0_col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn( + "subtask_1_col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn( + "subtask_2_col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn( + "subtask_3_col_varbinarytype", DataTypes.VARBINARY(17)), + Column.physicalColumn("col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("subtask_0_col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("subtask_1_col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("subtask_2_col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("subtask_3_col_timetype", DataTypes.TIME(9)), + Column.physicalColumn("col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "subtask_0_col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "subtask_1_col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "subtask_2_col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "subtask_3_col_timestamptype", DataTypes.TIMESTAMP(9)), + Column.physicalColumn( + "col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "subtask_0_col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "subtask_1_col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "subtask_2_col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "subtask_3_col_zonedtimestamptype", DataTypes.TIMESTAMP_TZ(9)), + Column.physicalColumn( + "col_localzonedtimestamptype", DataTypes.TIMESTAMP_LTZ(9)), + Column.physicalColumn( + "subtask_0_col_localzonedtimestamptype", + DataTypes.TIMESTAMP_LTZ(9)), + Column.physicalColumn( + "subtask_1_col_localzonedtimestamptype", + DataTypes.TIMESTAMP_LTZ(9)), + Column.physicalColumn( + "subtask_2_col_localzonedtimestamptype", + DataTypes.TIMESTAMP_LTZ(9)), + Column.physicalColumn( + "subtask_3_col_localzonedtimestamptype", + DataTypes.TIMESTAMP_LTZ(9))); + } + + String outputStr = outCaptor.toString(); + IntStream.range(0, MAX_PARALLELISM) + .forEach( + subTaskId -> + IntStream.range(0, 168) + .forEach( + seqNum -> + Assertions.assertThat(outputStr) + .contains( + String.format( + "__$%d$%d$__", + subTaskId, + seqNum)))); + + String[] dataLines = outputStr.split(System.lineSeparator()); + String[] expectedTokens = { + "true", + "17", + "34", + "68", + "136", + "272.0", + "544.0", + "1088.00000000000", + "Alice", + "Bob", + "Q2ljYWRh", + "RGVycmlkYQ==", + "64800000", + "2019-12-31T18:00", + "2020-07-17T18:00", + "1970-01-05T05:20:00.000123456+08:00" + }; + + Stream.of(expectedTokens) + .forEach( + token -> + Assertions.assertThat( + Stream.of(dataLines) + .filter(line -> line.contains(token)) + .count()) + .as("Checking presence of %s", token) + .isGreaterThanOrEqualTo( + UPSTREAM_TABLE_COUNT * MAX_PARALLELISM)); + } +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml index 79a5d562bfe..516eb1cb70e 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml @@ -34,41 +34,13 @@ limitations under the License. http://maven.apache.org - UTF-8 8.12.1 - 1.19.1 - 4.0 - 2.13.2 - --add-opens=java.base/java.util=ALL-UNNAMED - 1.16.0 3.0.1-1.17 4.5.13 - 5.7.1 - 3.18.1 - 4.13.2 - 1.7.32 - 1.10.2 2.0.2 - - org.apache.flink - flink-streaming-java - ${flink.version} - provided - - - org.apache.flink - flink-table-api-java-bridge - ${flink.version} - provided - - - org.apache.flink - flink-connector-base - ${flink.version} - org.apache.flink flink-json @@ -142,39 +114,6 @@ limitations under the License. ${elasticsearch.version} - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - - org.junit.jupiter - junit-jupiter-api - ${junit.jupiter.version} - test - - - org.junit.jupiter - junit-jupiter-engine - ${junit.jupiter.version} - test - - - org.assertj - assertj-core - ${assertj.version} - test - - - junit - junit - ${junit.version} - test - - org.apache.flink @@ -188,12 +127,6 @@ limitations under the License. ${flink.version} test - - org.junit.platform - junit-platform-launcher - ${junit.platform.version} - test - org.apache.flink flink-cdc-composer @@ -222,7 +155,6 @@ limitations under the License. org.apache.maven.plugins maven-shade-plugin - 3.2.4 package @@ -248,12 +180,7 @@ limitations under the License. org.apache.maven.plugins maven-compiler-plugin - 3.8.1 - - 1.8 - 1.8 - - + \ No newline at end of file diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSource.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSource.java index d1dc487c04e..6a2873173ce 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSource.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSource.java @@ -67,4 +67,11 @@ public MetadataAccessor getMetadataAccessor() { public MySqlSourceConfig getSourceConfig() { return sourceConfig; } + + @Override + public boolean canContainDistributedTables() { + // During incremental stage, MySQL never emits schema change events on different partitions + // (since it has one Binlog stream only.) + return false; + } } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/pom.xml new file mode 100644 index 00000000000..c33ef825373 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/pom.xml @@ -0,0 +1,48 @@ + + + + + flink-cdc-pipeline-connectors + org.apache.flink + ${revision} + + 4.0.0 + + flink-cdc-pipeline-connector-stimps + + + + + + + org.apache.flink + flink-runtime + ${flink.version} + provided + + + + org.apache.flink + flink-clients + ${flink.version} + test + + + \ No newline at end of file diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/factory/StimpsDataFactory.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/factory/StimpsDataFactory.java new file mode 100644 index 00000000000..d5c005154fc --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/factory/StimpsDataFactory.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.stimps.factory; + +import org.apache.flink.cdc.common.annotation.Internal; +import org.apache.flink.cdc.common.configuration.ConfigOption; +import org.apache.flink.cdc.common.factories.DataSourceFactory; +import org.apache.flink.cdc.common.factories.Factory; +import org.apache.flink.cdc.common.source.DataSource; +import org.apache.flink.cdc.connectors.stimps.source.StimpsDataSource; + +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableSet; + +import java.util.HashSet; +import java.util.Set; + +import static org.apache.flink.cdc.connectors.stimps.source.StimpsSourceOptions.DISTRIBUTED_TABLES; +import static org.apache.flink.cdc.connectors.stimps.source.StimpsSourceOptions.TABLE_COUNT; + +/** A source {@link Factory} to create {@link StimpsDataSource}. */ +@Internal +public class StimpsDataFactory implements DataSourceFactory { + + public static final String IDENTIFIER = "stimps"; + + @Override + public DataSource createDataSource(Context context) { + int tableCount = context.getFactoryConfiguration().get(TABLE_COUNT); + boolean distributedTables = context.getFactoryConfiguration().get(DISTRIBUTED_TABLES); + return new StimpsDataSource(tableCount, distributedTables); + } + + @Override + public String identifier() { + return IDENTIFIER; + } + + @Override + public Set> requiredOptions() { + return new HashSet<>(); + } + + @Override + public Set> optionalOptions() { + return ImmutableSet.of(TABLE_COUNT, DISTRIBUTED_TABLES); + } +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsDataSource.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsDataSource.java new file mode 100644 index 00000000000..4a8b4ef05ef --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsDataSource.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.stimps.source; + +import org.apache.flink.cdc.common.annotation.Internal; +import org.apache.flink.cdc.common.source.DataSource; +import org.apache.flink.cdc.common.source.EventSourceProvider; +import org.apache.flink.cdc.common.source.FlinkSourceFunctionProvider; +import org.apache.flink.cdc.common.source.MetadataAccessor; + +/** + * A {@link DataSource} for "stimps" connector that tests single-table-in-multi-partition scenario. + */ +@Internal +public class StimpsDataSource implements DataSource { + + private final int tableCount; + private final boolean distributedTables; + + public StimpsDataSource(int tableCount, boolean distributedTables) { + this.tableCount = tableCount; + this.distributedTables = distributedTables; + } + + @Override + public EventSourceProvider getEventSourceProvider() { + return FlinkSourceFunctionProvider.of( + new StimpsSourceFunction(tableCount, distributedTables)); + } + + @Override + public MetadataAccessor getMetadataAccessor() { + throw new UnsupportedOperationException("Stimps doesn't need a metadata accessor!"); + } + + @Override + public boolean canContainDistributedTables() { + return distributedTables; + } +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsSourceFunction.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsSourceFunction.java new file mode 100644 index 00000000000..4b9b270c7b8 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsSourceFunction.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.stimps.source; + +import org.apache.flink.cdc.common.data.DecimalData; +import org.apache.flink.cdc.common.data.LocalZonedTimestampData; +import org.apache.flink.cdc.common.data.TimestampData; +import org.apache.flink.cdc.common.data.ZonedTimestampData; +import org.apache.flink.cdc.common.data.binary.BinaryRecordData; +import org.apache.flink.cdc.common.data.binary.BinaryStringData; +import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.DataChangeEvent; +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.types.DataType; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** Source function for "STIMPS" testing source. */ +public class StimpsSourceFunction extends RichParallelSourceFunction { + + private static final Logger LOG = LoggerFactory.getLogger(StimpsSourceFunction.class); + + private int subTaskId; + private int parallelism; + + private final int numOfTables; + private final boolean distributedTables; + private transient Map dummyDataTypes; + private List tables; + private transient int iotaCounter; + + public StimpsSourceFunction(int numOfTables, boolean distributedTables) { + this.numOfTables = numOfTables; + this.distributedTables = distributedTables; + } + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + iotaCounter = 0; + subTaskId = getRuntimeContext().getTaskInfo().getIndexOfThisSubtask(); + parallelism = getRuntimeContext().getTaskInfo().getNumberOfParallelSubtasks(); + if (distributedTables) { + tables = + IntStream.range(0, numOfTables) + .mapToObj( + idx -> + TableId.tableId( + "default_namespace", + "default_database", + "table_" + idx)) + .collect(Collectors.toList()); + } else { + tables = + IntStream.range(0, numOfTables) + .mapToObj( + idx -> + TableId.tableId( + "default_namespace_subtask_" + subTaskId, + "default_database", + "table_" + idx)) + .collect(Collectors.toList()); + } + dummyDataTypes = new LinkedHashMap<>(); + dummyDataTypes.put(DataTypes.BOOLEAN(), true); + dummyDataTypes.put(DataTypes.TINYINT(), (byte) 17); + dummyDataTypes.put(DataTypes.SMALLINT(), (short) 34); + dummyDataTypes.put(DataTypes.INT(), (int) 68); + dummyDataTypes.put(DataTypes.BIGINT(), (long) 136); + dummyDataTypes.put(DataTypes.FLOAT(), (float) 272.0); + dummyDataTypes.put(DataTypes.DOUBLE(), (double) 544.0); + dummyDataTypes.put( + DataTypes.DECIMAL(17, 11), + DecimalData.fromBigDecimal(new BigDecimal("1088.000"), 17, 11)); + dummyDataTypes.put(DataTypes.CHAR(17), BinaryStringData.fromString("Alice")); + dummyDataTypes.put(DataTypes.VARCHAR(17), BinaryStringData.fromString("Bob")); + dummyDataTypes.put(DataTypes.BINARY(17), "Cicada".getBytes()); + dummyDataTypes.put(DataTypes.VARBINARY(17), "Derrida".getBytes()); + dummyDataTypes.put(DataTypes.TIME(9), 64800000); + dummyDataTypes.put( + DataTypes.TIMESTAMP(9), + TimestampData.fromTimestamp(Timestamp.valueOf("2020-07-17 18:00:00"))); + dummyDataTypes.put( + DataTypes.TIMESTAMP_TZ(9), + ZonedTimestampData.of(364800000, 123456, "Asia/Shanghai")); + dummyDataTypes.put( + DataTypes.TIMESTAMP_LTZ(9), + LocalZonedTimestampData.fromInstant(toInstant("2019-12-31 18:00:00"))); + } + + // Generates statically incrementing data, could be used for data integrity verification. + private BinaryStringData iota() { + return BinaryStringData.fromString(String.format("__$%d$%d$__", subTaskId, iotaCounter++)); + } + + private void sendFromTables(Consumer tableIdConsumer) { + if (parallelism > 1) { + // Inject a little randomness in multi-parallelism mode + Collections.shuffle(tables); + } + tables.forEach(tableIdConsumer); + } + + @Override + public void run(SourceContext context) throws InterruptedException { + Schema initialSchema = + Schema.newBuilder() + .physicalColumn("id", DataTypes.STRING()) + .primaryKey("id") + .partitionKey("id") + .build(); + + Map headSchemaMap = new HashMap<>(); + + sendFromTables( + tableId -> { + for (int i = 0; i < 10; i++) { + CreateTableEvent createTableEvent = + new CreateTableEvent(tableId, initialSchema); + headSchemaMap.compute( + tableId, + (tbl, schema) -> + SchemaUtils.applySchemaChangeEvent( + schema, createTableEvent)); + collect(context, createTableEvent); + collect( + context, + DataChangeEvent.insertEvent( + tableId, generateBinRec(headSchemaMap.get(tableId)))); + } + }); + + List fullTypes = new ArrayList<>(dummyDataTypes.keySet()); + if (parallelism > 1) { + // Inject randomness in multi-parallelism mode + Collections.shuffle(fullTypes); + } + fullTypes.forEach( + colType -> { + sendFromTables( + tableId -> { + AddColumnEvent addColumnEvent = + new AddColumnEvent( + tableId, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "col_" + + colType.getClass() + .getSimpleName() + .toLowerCase(), + colType)))); + + headSchemaMap.compute( + tableId, + (tbl, schema) -> + SchemaUtils.applySchemaChangeEvent( + schema, addColumnEvent)); + collect(context, addColumnEvent); + collect( + context, + DataChangeEvent.insertEvent( + tableId, + generateBinRec(headSchemaMap.get(tableId)))); + }); + + sendFromTables( + tableId -> { + AddColumnEvent addColumnEvent = + new AddColumnEvent( + tableId, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "subtask_" + + subTaskId + + "_col_" + + colType.getClass() + .getSimpleName() + .toLowerCase(), + colType)))); + + headSchemaMap.compute( + tableId, + (tbl, schema) -> + SchemaUtils.applySchemaChangeEvent( + schema, addColumnEvent)); + collect(context, addColumnEvent); + collect( + context, + DataChangeEvent.insertEvent( + tableId, + generateBinRec(headSchemaMap.get(tableId)))); + }); + }); + + if (parallelism > 1) { + // To allow test running correctly, we need to wait for downstream schema evolutions + // to finish before closing any subTask. + Thread.sleep(10000); + } + } + + @Override + public void cancel() { + // Do nothing + } + + private BinaryRecordData generateBinRec(Schema schema) { + BinaryRecordDataGenerator generator = + new BinaryRecordDataGenerator(schema.getColumnDataTypes().toArray(new DataType[0])); + + int arity = schema.getColumnDataTypes().size(); + List rowTypes = schema.getColumnDataTypes(); + Object[] rowObjects = new Object[arity]; + + for (int i = 0; i < arity; i++) { + DataType type = rowTypes.get(i); + if (Objects.equals(type, DataTypes.STRING())) { + rowObjects[i] = iota(); + } else { + rowObjects[i] = dummyDataTypes.get(type); + } + } + return generator.generate(rowObjects); + } + + private void collect(SourceContext sourceContext, Event event) { + LOG.info("{}> Emitting event {}", subTaskId, event); + sourceContext.collect(event); + } + + private Instant toInstant(String ts) { + return Timestamp.valueOf(ts).toLocalDateTime().atZone(ZoneId.of("UTC")).toInstant(); + } +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsSourceOptions.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsSourceOptions.java new file mode 100644 index 00000000000..4dc2f23d50e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/java/org/apache/flink/cdc/connectors/stimps/source/StimpsSourceOptions.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.stimps.source; + +import org.apache.flink.cdc.common.configuration.ConfigOption; +import org.apache.flink.cdc.common.configuration.ConfigOptions; + +/** Configurations for {@link StimpsDataSource}. */ +public class StimpsSourceOptions { + public static final ConfigOption TABLE_COUNT = + ConfigOptions.key("table-count") + .intType() + .defaultValue(4) + .withDescription("Number of parallelized tables in one single parallelism."); + + public static final ConfigOption DISTRIBUTED_TABLES = + ConfigOptions.key("distributed-tables") + .booleanType() + .defaultValue(false) + .withDescription( + "Whether this source should emit distributed tables that might present in multiple partitions. Defaults to false."); +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/resources/META-INF/services/org.apache.flink.cdc.common.factories.Factory b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/resources/META-INF/services/org.apache.flink.cdc.common.factories.Factory new file mode 100644 index 00000000000..c6ca06577dd --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-stimps/src/main/resources/META-INF/services/org.apache.flink.cdc.common.factories.Factory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.flink.cdc.connectors.stimps.factory.StimpsDataFactory diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-values/src/main/java/org/apache/flink/cdc/connectors/values/sink/ValuesDataSinkHelper.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-values/src/main/java/org/apache/flink/cdc/connectors/values/sink/ValuesDataSinkHelper.java index ba52c1fd87e..1d93899f9cb 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-values/src/main/java/org/apache/flink/cdc/connectors/values/sink/ValuesDataSinkHelper.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-values/src/main/java/org/apache/flink/cdc/connectors/values/sink/ValuesDataSinkHelper.java @@ -22,8 +22,11 @@ import org.apache.flink.cdc.common.event.Event; import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.shaded.guava31.com.google.common.io.BaseEncoding; + import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; /** A helper class for {@link ValuesDataSink} to process {@link Event}. */ public class ValuesDataSinkHelper { @@ -61,6 +64,15 @@ private static List getFields( for (RecordData.FieldGetter fieldGetter : fieldGetters) { fields.add(fieldGetter.getFieldOrNull(recordData)); } - return fields; + return fields.stream() + .map( + o -> { + if (o instanceof byte[]) { + return BaseEncoding.base64().encode((byte[]) o); + } else { + return o; + } + }) + .collect(Collectors.toList()); } } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/pom.xml index 9d0f9769b5a..ce022ee4016 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/pom.xml @@ -35,6 +35,7 @@ limitations under the License. flink-cdc-pipeline-connector-kafka flink-cdc-pipeline-connector-paimon flink-cdc-pipeline-connector-elasticsearch + flink-cdc-pipeline-connector-stimps diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/MysqlE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/MysqlE2eITCase.java index a8c7a8c5f67..71e25b07722 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/MysqlE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/MysqlE2eITCase.java @@ -399,17 +399,9 @@ public void testDanglingDropTableEventInBinlog() throws Exception { Path mysqlDriverJar = TestUtils.getResource("mysql-driver.jar"); submitPipelineJob(pipelineJob, mysqlCdcJar, valuesCdcJar, mysqlDriverJar); waitUntilJobRunning(Duration.ofSeconds(30)); - LOG.info("Pipeline job is running"); - waitUntilSpecificEvent( - String.format( - "Table %s.live_fast received SchemaChangeEvent DropTableEvent{tableId=%s.live_fast} and start to be blocked.", - mysqlInventoryDatabase.getDatabaseName(), - mysqlInventoryDatabase.getDatabaseName())); - - waitUntilSpecificEvent( - String.format( - "Schema change event DropTableEvent{tableId=%s.live_fast} has been handled in another subTask already.", - mysqlInventoryDatabase.getDatabaseName())); + validateResult( + "CreateTableEvent{tableId=%s.customers, schema=columns={`id` INT NOT NULL,`name` VARCHAR(255) NOT NULL 'flink',`address` VARCHAR(1024),`phone_number` VARCHAR(512)}, primaryKeys=id, options=()}", + "CreateTableEvent{tableId=%s.products, schema=columns={`id` INT NOT NULL,`name` VARCHAR(255) NOT NULL 'flink',`description` VARCHAR(512),`weight` FLOAT,`enum_c` STRING 'red',`json_c` STRING,`point_c` STRING}, primaryKeys=id, options=()}"); } private void validateResult(String... expectedEvents) throws Exception { diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/RouteE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/RouteE2eITCase.java index c9b26c7818b..dd3119cd33e 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/RouteE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/RouteE2eITCase.java @@ -307,9 +307,9 @@ public void testMergeTableRoute() throws Exception { generateSchemaChanges(); validateResult( - "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=AFTER, existedColumnName=VERSION}]}", "DataChangeEvent{tableId=%s.ALL, before=[], after=[10001, 12, Derrida], op=INSERT, meta=()}", - "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=AFTER, existedColumnName=NAME}]}", "DataChangeEvent{tableId=%s.ALL, before=[], after=[10002, null, null, 15], op=INSERT, meta=()}", "AlterColumnTypeEvent{tableId=%s.ALL, typeMapping={VERSION=STRING}, oldTypeMapping={VERSION=VARCHAR(17)}}", "DataChangeEvent{tableId=%s.ALL, before=[], after=[10003, null, null, Fluorite], op=INSERT, meta=()}", @@ -401,9 +401,9 @@ public void testPartialRoute() throws Exception { generateSchemaChanges(); validateResult( - "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=AFTER, existedColumnName=VERSION}]}", "DataChangeEvent{tableId=NEW_%s.ALPHABET, before=[], after=[10001, 12, Derrida], op=INSERT, meta=()}", - "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=AFTER, existedColumnName=NAME}]}", "DataChangeEvent{tableId=NEW_%s.ALPHABET, before=[], after=[10002, null, null, 15], op=INSERT, meta=()}", "AlterColumnTypeEvent{tableId=%s.TABLEGAMMA, typeMapping={VERSION=VARCHAR(19)}, oldTypeMapping={VERSION=VARCHAR(17)}}", "RenameColumnEvent{tableId=%s.TABLEGAMMA, nameMapping={VERSION=VERSION_EX}}", @@ -506,10 +506,10 @@ public void testMultipleRoute() throws Exception { generateSchemaChanges(); validateResult( - "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=AFTER, existedColumnName=VERSION}]}", "DataChangeEvent{tableId=NEW_%s.ALPHABET, before=[], after=[10001, 12, Derrida], op=INSERT, meta=()}", - "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=LAST, existedColumnName=null}]}", - "AddColumnEvent{tableId=NEW_%s.BETAGAMM, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=NEW_%s.ALPHABET, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=AFTER, existedColumnName=NAME}]}", + "AddColumnEvent{tableId=NEW_%s.BETAGAMM, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=AFTER, existedColumnName=VERSION}]}", "DataChangeEvent{tableId=NEW_%s.ALPHABET, before=[], after=[10002, null, null, 15], op=INSERT, meta=()}", "DataChangeEvent{tableId=NEW_%s.BETAGAMM, before=[], after=[10002, null, 15], op=INSERT, meta=()}", "AlterColumnTypeEvent{tableId=NEW_%s.BETAGAMM, typeMapping={VERSION=STRING}, oldTypeMapping={VERSION=VARCHAR(17)}}", @@ -712,9 +712,9 @@ public void testMergeTableRouteWithTransform() throws Exception { generateSchemaChanges(); validateResult( - "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`NAME` VARCHAR(17), position=AFTER, existedColumnName=EXTRAS}]}", "DataChangeEvent{tableId=%s.ALL, before=[], after=[10001, 12, extras, Derrida], op=INSERT, meta=()}", - "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=LAST, existedColumnName=null}]}", + "AddColumnEvent{tableId=%s.ALL, addedColumns=[ColumnWithPosition{column=`VERSION_EX` VARCHAR(17), position=AFTER, existedColumnName=NAME}]}", "DataChangeEvent{tableId=%s.ALL, before=[], after=[10002, null, extras, null, 15], op=INSERT, meta=()}", "AlterColumnTypeEvent{tableId=%s.ALL, typeMapping={VERSION=STRING}, oldTypeMapping={VERSION=VARCHAR(17)}}", "DataChangeEvent{tableId=%s.ALL, before=[], after=[10003, null, extras, null, Fluorite], op=INSERT, meta=()}", @@ -890,6 +890,7 @@ public void testExtremeMergeTableRoute() throws Exception { "> DataChangeEvent{tableId=%s.TABLE%d, before=[], after=[%d, No.%d], op=INSERT, meta=()}", databaseName, i, i, i)) .toArray(String[]::new)); + extremeRouteTestDatabase.dropDatabase(); } private void validateResult(String... expectedEvents) throws Exception { @@ -919,6 +920,17 @@ private void waitUntilSpecificEvent(long timeout, String event) throws Exception Thread.sleep(1000); } if (!result) { + System.out.println(); + System.out.println(); + System.out.println("================"); + System.out.println(" JM Log: "); + System.out.println(jobManagerConsumer.toUtf8String()); + System.out.println("================"); + System.out.println(" TM Log: "); + System.out.println(taskManagerConsumer.toUtf8String()); + System.out.println("================"); + System.out.println(); + System.out.println(); throw new TimeoutException( "failed to get specific event: " + event diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolveE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolveE2eITCase.java index 0b514c3b9c7..29be6b00e48 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolveE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolveE2eITCase.java @@ -113,10 +113,14 @@ public void testSchemaEvolveWithIncompatibleChanges() throws Exception { true, false, false, - Collections.emptyList(), Arrays.asList( - "java.lang.IllegalStateException: Incompatible types found for column `age`: \"INT\" and \"DOUBLE\"", - "org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy")); + "AddColumnEvent{tableId=%s.merged, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}", + "DataChangeEvent{tableId=%s.merged, before=[], after=[1012, Eve, 17, 0], op=INSERT, meta=()}", + "AlterColumnTypeEvent{tableId=%s.merged, typeMapping={age=STRING}, oldTypeMapping={age=INT}}", + "AddColumnEvent{tableId=%s.merged, addedColumns=[ColumnWithPosition{column=`precise_age` DOUBLE, position=AFTER, existedColumnName=gender}]}", + "AddColumnEvent{tableId=%s.merged, addedColumns=[ColumnWithPosition{column=`biological_sex` TINYINT, position=AFTER, existedColumnName=precise_age}]}", + "DataChangeEvent{tableId=%s.merged, before=[], after=[1013, Fiona, null, null, 16.0, null], op=INSERT, meta=()}", + "DataChangeEvent{tableId=%s.merged, before=[], after=[1014, Gem, null, null, 17.0, null], op=INSERT, meta=()}")); } @Test @@ -128,8 +132,8 @@ public void testSchemaEvolveWithException() throws Exception { false, Collections.emptyList(), Arrays.asList( - "Failed to apply schema change AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]} to table %s.members. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", - "UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to apply schema change event.", + "Caused by: org.apache.flink.cdc.common.exceptions.UnsupportedSchemaChangeEventException", "org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy")); } @@ -143,10 +147,16 @@ public void testSchemaTryEvolveWithException() throws Exception { Arrays.asList( // Add column never succeeded, so age column will not appear. "DataChangeEvent{tableId=%s.members, before=[], after=[1012, Eve, 17], op=INSERT, meta=()}", - "DataChangeEvent{tableId=%s.members, before=[], after=[1013, Fiona, null], op=INSERT, meta=()}"), + "DataChangeEvent{tableId=%s.members, before=[], after=[1013, Fiona, null], op=INSERT, meta=()}", + "DataChangeEvent{tableId=%s.members, before=[], after=[1014, Gem, null], op=INSERT, meta=()}"), Arrays.asList( - "Failed to apply schema change AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]} to table %s.members. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", - "UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}")); + "Failed to apply schema change AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change AlterColumnTypeEvent{tableId=%s.members, typeMapping={age=DOUBLE}, oldTypeMapping={age=INT}}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AlterColumnTypeEvent{tableId=%s.members, typeMapping={age=DOUBLE}, oldTypeMapping={age=INT}}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change RenameColumnEvent{tableId=%s.members, nameMapping={age=precise_age}}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=RenameColumnEvent{tableId=%s.members, nameMapping={age=precise_age}}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change RenameColumnEvent{tableId=%s.members, nameMapping={gender=biological_sex}}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=RenameColumnEvent{tableId=%s.members, nameMapping={gender=biological_sex}}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change TruncateTableEvent{tableId=%s.members}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=TruncateTableEvent{tableId=%s.members}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change DropTableEvent{tableId=%s.members}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=DropTableEvent{tableId=%s.members}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}")); } @Test @@ -171,8 +181,9 @@ public void testSchemaException() throws Exception { false, false, Collections.emptyList(), - Collections.singletonList( - "java.lang.RuntimeException: Refused to apply schema change event AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]} in EXCEPTION mode.")); + Arrays.asList( + "An exception was triggered from Schema change applying task. Job will fail now.", + "org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy")); } @Test @@ -216,8 +227,8 @@ public void testFineGrainedSchemaEvolution() throws Exception { "TruncateTableEvent{tableId=%s.members}", "DataChangeEvent{tableId=%s.members, before=[], after=[1014, Gem, 17.0, null], op=INSERT, meta=()}"), Arrays.asList( - "Ignored schema change DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]} to table %s.members.", - "Ignored schema change DropTableEvent{tableId=%s.members} to table %s.members.")); + "Ignored schema change DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]}.", + "Ignored schema change DropTableEvent{tableId=%s.members}.")); } @Test diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolvingTransformE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolvingTransformE2eITCase.java index 4f40d897293..1e7d7d3816d 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolvingTransformE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/SchemaEvolvingTransformE2eITCase.java @@ -113,10 +113,13 @@ public void testSchemaEvolveWithIncompatibleChanges() throws Exception { true, false, false, - Collections.emptyList(), Arrays.asList( - "java.lang.IllegalStateException: Incompatible types found for column `age`: \"INT\" and \"DOUBLE\"", - "org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy")); + "AddColumnEvent{tableId=%s.merged, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=tag}]}", + "DataChangeEvent{tableId=%s.merged, before=[], after=[1012 -> Eve, 1012, Eve, 17, 1024144, age < 20, 0], op=INSERT, meta=()}", + "AlterColumnTypeEvent{tableId=%s.merged, typeMapping={age=STRING}, oldTypeMapping={age=INT}}", + "AddColumnEvent{tableId=%s.merged, addedColumns=[ColumnWithPosition{column=`biological_sex` TINYINT, position=AFTER, existedColumnName=gender}]}", + "DataChangeEvent{tableId=%s.merged, before=[], after=[1013 -> Fiona, 1013, Fiona, 16.0, 1026169, age < 20, null, null], op=INSERT, meta=()}", + "DataChangeEvent{tableId=%s.merged, before=[], after=[1014 -> Gem, 1014, Gem, 17.0, 1028196, age < 20, null, null], op=INSERT, meta=()}")); } @Test @@ -128,8 +131,9 @@ public void testSchemaEvolveWithException() throws Exception { false, Collections.emptyList(), Arrays.asList( - "Failed to apply schema change AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]} to table %s.members. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", - "UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "An exception was triggered from Schema change applying task. Job will fail now.", + "org.apache.flink.util.FlinkRuntimeException: Failed to apply schema change event.", + "Caused by: org.apache.flink.cdc.common.exceptions.UnsupportedSchemaChangeEventException", "org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy")); } @@ -146,8 +150,12 @@ public void testSchemaTryEvolveWithException() throws Exception { "DataChangeEvent{tableId=%s.members, before=[], after=[1013 -> Fiona, 1013, Fiona, null, 1026169, age < 20], op=INSERT, meta=()}", "DataChangeEvent{tableId=%s.members, before=[], after=[1014 -> Gem, 1014, Gem, null, 1028196, age < 20], op=INSERT, meta=()}"), Arrays.asList( - "Failed to apply schema change AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]} to table %s.members. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", - "UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}")); + "Failed to apply schema change AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change AlterColumnTypeEvent{tableId=%s.members, typeMapping={age=DOUBLE}, oldTypeMapping={age=INT}}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=AlterColumnTypeEvent{tableId=%s.members, typeMapping={age=DOUBLE}, oldTypeMapping={age=INT}}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change RenameColumnEvent{tableId=%s.members, nameMapping={gender=biological_sex}}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=RenameColumnEvent{tableId=%s.members, nameMapping={gender=biological_sex}}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change TruncateTableEvent{tableId=%s.members}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=TruncateTableEvent{tableId=%s.members}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}", + "Failed to apply schema change DropTableEvent{tableId=%s.members}, but keeps running in tolerant mode. Caused by: UnsupportedSchemaChangeEventException{applyingEvent=DropTableEvent{tableId=%s.members}, exceptionMessage='Rejected schema change event since error.on.schema.change is enabled.', cause='null'}")); } @Test @@ -172,8 +180,9 @@ public void testSchemaException() throws Exception { false, false, Collections.emptyList(), - Collections.singletonList( - "java.lang.RuntimeException: Refused to apply schema change event AddColumnEvent{tableId=%s.members, addedColumns=[ColumnWithPosition{column=`gender` TINYINT, position=AFTER, existedColumnName=age}]} in EXCEPTION mode.")); + Arrays.asList( + "An exception was triggered from Schema change applying task. Job will fail now.", + "org.apache.flink.util.FlinkRuntimeException: Failed to apply schema change event.")); } @Test @@ -209,8 +218,8 @@ public void testFineGrainedSchemaEvolution() throws Exception { "TruncateTableEvent{tableId=%s.members}", "DataChangeEvent{tableId=%s.members, before=[], after=[1014 -> Gem, 1014, Gem, 17.0, null, 1028196, age < 20], op=INSERT, meta=()}"), Arrays.asList( - "Ignored schema change DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]} to table %s.members.", - "Ignored schema change DropTableEvent{tableId=%s.members} to table %s.members.")); + "Ignored schema change DropColumnEvent{tableId=%s.members, droppedColumnNames=[biological_sex]}.", + "Ignored schema change DropTableEvent{tableId=%s.members}.")); } @Test diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/TransformE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/TransformE2eITCase.java index ea3e2e01713..672f6615192 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/TransformE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/TransformE2eITCase.java @@ -1179,8 +1179,14 @@ private void waitForTemporaryRecords(int expectedRecords, long timeout) throws E boolean extractDataLines(String line) { // In multiple parallelism mode, a prefix with subTaskId (like '1> ') will be appended. // Should trim it before extracting data fields. - if (!line.startsWith("DataChangeEvent{", 3)) { - return false; + if (parallelism > 1) { + if (!line.startsWith("DataChangeEvent{", 3)) { + return false; + } + } else { + if (!line.startsWith("DataChangeEvent{")) { + return false; + } } Stream.of("before", "after") .forEach( diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/utils/PipelineTestEnvironment.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/utils/PipelineTestEnvironment.java index a1c3c482b30..eb14a531bf1 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/utils/PipelineTestEnvironment.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/utils/PipelineTestEnvironment.java @@ -68,7 +68,18 @@ public abstract class PipelineTestEnvironment extends TestLogger { @Parameterized.Parameter public String flinkVersion; - public Integer parallelism = 4; + public Integer parallelism = getParallelism(); + + private int getParallelism() { + try { + return Integer.parseInt(System.getProperty("specifiedParallelism")); + } catch (NumberFormatException ex) { + LOG.warn( + "Unable to parse specified parallelism configuration ({} provided). Use 4 by default.", + System.getProperty("specifiedParallelism")); + return 4; + } + } // ------------------------------------------------------------------------------------------ // Flink Variables @@ -230,6 +241,8 @@ public void waitUntilJobRunning(Duration timeout) { JobStatusMessage message = jobStatusMessages.iterator().next(); JobStatus jobStatus = message.getJobState(); if (jobStatus.isTerminalState()) { + System.err.println("JM Log: "); + System.err.println(jobManagerConsumer.toUtf8String()); throw new ValidationException( String.format( "Job has been terminated! JobName: %s, JobID: %s, Status: %s", diff --git a/flink-cdc-migration-tests/flink-cdc-migration-testcases/pom.xml b/flink-cdc-migration-tests/flink-cdc-migration-testcases/pom.xml index 12c468a4b47..db8561f7e85 100644 --- a/flink-cdc-migration-tests/flink-cdc-migration-testcases/pom.xml +++ b/flink-cdc-migration-tests/flink-cdc-migration-testcases/pom.xml @@ -36,25 +36,13 @@ limitations under the License. org.apache.flink - flink-cdc-release-3.0.0 + flink-cdc-release-3.2.0 ${revision} compile org.apache.flink - flink-cdc-release-3.0.1 - ${revision} - compile - - - org.apache.flink - flink-cdc-release-3.1.0 - ${revision} - compile - - - org.apache.flink - flink-cdc-release-3.1.1 + flink-cdc-release-3.2.1 ${revision} compile diff --git a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/MigrationTestBase.java b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/MigrationTestBase.java index e1020442b76..dc7c841a8ed 100644 --- a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/MigrationTestBase.java +++ b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/MigrationTestBase.java @@ -17,7 +17,7 @@ package org.apache.flink.cdc.migration.tests; -import org.junit.Assert; +import org.assertj.core.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,22 +32,16 @@ public class MigrationTestBase { /** Flink CDC versions since 3.0. */ public enum FlinkCdcVersion { - v3_0_0, - v3_0_1, - v3_1_0, - v3_1_1, + v3_2_0, + v3_2_1, SNAPSHOT; public String getShadedClassPrefix() { switch (this) { - case v3_0_0: - return "com.ververica.cdc.v3_0_0"; - case v3_0_1: - return "com.ververica.cdc.v3_0_1"; - case v3_1_0: - return "org.apache.flink.cdc.v3_1_0"; - case v3_1_1: - return "org.apache.flink.cdc.v3_1_1"; + case v3_2_0: + return "org.apache.flink.cdc.v3_2_0"; + case v3_2_1: + return "org.apache.flink.cdc.v3_2_1"; case SNAPSHOT: return "org.apache.flink.cdc.snapshot"; default: @@ -57,12 +51,7 @@ public String getShadedClassPrefix() { } private static final List versions = - Arrays.asList( - FlinkCdcVersion.v3_0_0, - FlinkCdcVersion.v3_0_1, - FlinkCdcVersion.v3_1_0, - FlinkCdcVersion.v3_1_1, - FlinkCdcVersion.SNAPSHOT); + Arrays.asList(FlinkCdcVersion.v3_2_0, FlinkCdcVersion.v3_2_1, FlinkCdcVersion.SNAPSHOT); public static List getAllVersions() { return versions.subList(0, versions.size()); @@ -111,11 +100,17 @@ protected void testMigrationFromTo( Class toVersionMockClass = getMockClass(toVersion, caseName); Object toVersionMockObject = toVersionMockClass.newInstance(); - Assert.assertTrue( - (boolean) - toVersionMockClass - .getDeclaredMethod( - "deserializeAndCheckObject", int.class, byte[].class) - .invoke(toVersionMockObject, serializerVersion, serializedObject)); + Assertions.assertThat( + (boolean) + toVersionMockClass + .getDeclaredMethod( + "deserializeAndCheckObject", + int.class, + byte[].class) + .invoke( + toVersionMockObject, + serializerVersion, + serializedObject)) + .isTrue(); } } diff --git a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationTest.java b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationTest.java index 11adf6f35e4..a3a9eba8d7e 100644 --- a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationTest.java +++ b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationTest.java @@ -19,20 +19,16 @@ import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; -import org.junit.Test; - -import static org.apache.flink.cdc.migration.tests.MigrationTestBase.FlinkCdcVersion.v3_1_0; +import org.junit.jupiter.api.Test; /** Migration test cases for {@link SchemaManager}. */ -public class SchemaManagerMigrationTest extends MigrationTestBase { +class SchemaManagerMigrationTest extends MigrationTestBase { public static String mockCaseName = "SchemaManagerMigrationMock"; @Test - public void testMigration() throws Exception { - // It is known that 3.1.0 that breaks backwards compatibility. - // No state compatibility is guaranteed. - for (FlinkCdcVersion version : getAllVersionExcept(v3_1_0)) { + void testMigration() throws Exception { + for (FlinkCdcVersion version : getAllVersions()) { testMigrationFromTo(version, getSnapshotVersion(), mockCaseName); } } diff --git a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationTest.java b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationTest.java index 1e1a4c68e9a..11a11f69f30 100644 --- a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationTest.java +++ b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationTest.java @@ -19,19 +19,15 @@ import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.junit.Test; - -import static org.apache.flink.cdc.migration.tests.MigrationTestBase.FlinkCdcVersion.v3_1_0; +import org.junit.jupiter.api.Test; /** Migration test cases for {@link SchemaRegistry}. */ -public class SchemaRegistryMigrationTest extends MigrationTestBase { - public static String mockCaseName = "SchemaRegistryMigrationMock"; +class SchemaRegistryMigrationTest extends MigrationTestBase { + static String mockCaseName = "SchemaRegistryMigrationMock"; @Test - public void testMigration() throws Exception { - // It is known that 3.1.0 that breaks backwards compatibility. - // No state compatibility is guaranteed. - for (FlinkCdcVersion version : getAllVersionExcept(v3_1_0)) { + void testMigration() throws Exception { + for (FlinkCdcVersion version : getAllVersions()) { testMigrationFromTo(version, getSnapshotVersion(), mockCaseName); } } diff --git a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationTest.java b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationTest.java index 3af59245c2f..a2540db288d 100644 --- a/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationTest.java +++ b/flink-cdc-migration-tests/flink-cdc-migration-testcases/src/test/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationTest.java @@ -17,24 +17,16 @@ package org.apache.flink.cdc.migration.tests; -import org.apache.flink.cdc.runtime.operators.transform.TableChangeInfo; +import org.junit.jupiter.api.Test; -import org.junit.Test; +/** Migration test cases for {@code PreTransformChangeInfo}. */ +class TableChangeInfoMigrationTest extends MigrationTestBase { -import static org.apache.flink.cdc.migration.tests.MigrationTestBase.FlinkCdcVersion.v3_0_0; -import static org.apache.flink.cdc.migration.tests.MigrationTestBase.FlinkCdcVersion.v3_0_1; -import static org.apache.flink.cdc.migration.tests.MigrationTestBase.FlinkCdcVersion.v3_1_0; - -/** Migration test cases for {@link TableChangeInfo}. */ -public class TableChangeInfoMigrationTest extends MigrationTestBase { - - public static String mockCaseName = "TableChangeInfoMigrationMock"; + static String mockCaseName = "TableChangeInfoMigrationMock"; @Test - public void testMigration() throws Exception { - // Transform feature does not present until 3.1.0, and - // CDC 3.1.0 breaks backwards compatibility. - for (FlinkCdcVersion version : getAllVersionExcept(v3_0_0, v3_0_1, v3_1_0)) { + void testMigration() throws Exception { + for (FlinkCdcVersion version : getAllVersions()) { testMigrationFromTo(version, getSnapshotVersion(), mockCaseName); } } diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/pom.xml b/flink-cdc-migration-tests/flink-cdc-release-3.0.0/pom.xml deleted file mode 100644 index 7207e59967c..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/pom.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - 4.0.0 - - org.apache.flink - flink-cdc-migration-tests - ${revision} - - - flink-cdc-release-3.0.0 - flink-cdc-release-3.0.0 - - - - com.ververica - flink-cdc-base - 3.0.0 - - - com.ververica - flink-cdc-common - 3.0.0 - - - com.ververica - flink-cdc-runtime - 3.0.0 - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.2.4 - - - shade-flink-cdc - package - - shade - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - com.ververica.cdc - com.ververica.cdc.v3_0_0 - - - - - - - - - - \ No newline at end of file diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/MigrationMockBase.java b/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/MigrationMockBase.java deleted file mode 100644 index 2dd92446e45..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/MigrationMockBase.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.ververica.cdc.migration.tests; - -/** Base classes for migration test cases. */ -public interface MigrationMockBase { - int getSerializerVersion(); - - byte[] serializeObject() throws Exception; - - boolean deserializeAndCheckObject(int v, byte[] b) throws Exception; -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/SchemaManagerMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/SchemaManagerMigrationMock.java deleted file mode 100644 index 44477b5874d..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/SchemaManagerMigrationMock.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.ververica.cdc.migration.tests; - -import com.ververica.cdc.common.event.TableId; -import com.ververica.cdc.common.schema.Schema; -import com.ververica.cdc.common.types.DataTypes; -import com.ververica.cdc.runtime.operators.schema.coordinator.SchemaManager; - -import java.util.Collections; -import java.util.SortedMap; -import java.util.TreeMap; - -/** Dummy classes for migration test. Called via reflection. */ -public class SchemaManagerMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); - - public SchemaManager generateDummyObject() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); - } - - @Override - public int getSerializerVersion() { - return SchemaManager.SERIALIZER.getVersion(); - } - - @Override - public byte[] serializeObject() throws Exception { - return SchemaManager.SERIALIZER.serialize(generateDummyObject()); - } - - @Override - public boolean deserializeAndCheckObject(int version, byte[] serialized) throws Exception { - Object expected = generateDummyObject(); - Object actual = SchemaManager.SERIALIZER.deserialize(version, serialized); - return expected.equals(actual); - } -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/SchemaRegistryMigrationMock.java deleted file mode 100644 index 3322fb0be5f..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.0/src/main/java/com/ververica/cdc/migration/tests/SchemaRegistryMigrationMock.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.ververica.cdc.migration.tests; - -import com.ververica.cdc.common.event.TableId; -import com.ververica.cdc.common.schema.Schema; -import com.ververica.cdc.common.types.DataTypes; -import com.ververica.cdc.runtime.operators.schema.coordinator.SchemaManager; -import com.ververica.cdc.runtime.operators.schema.coordinator.SchemaRegistry; - -import java.lang.reflect.Field; -import java.util.Collections; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; - -/** Dummy classes for migration test. Called via reflection. */ -public class SchemaRegistryMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); - - public SchemaManager generateDummySchemaManager() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); - } - - public SchemaRegistry generateSchemaRegistry() { - return new SchemaRegistry("Dummy Name", null, e -> {}); - } - - private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - return (SchemaManager) field.get(schemaRegistry); - } - - private void setSchemaManager(SchemaRegistry schemaRegistry, SchemaManager schemaManager) - throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - field.set(schemaRegistry, schemaManager); - } - - @Override - public int getSerializerVersion() { - return -1; - } - - @Override - public byte[] serializeObject() throws Exception { - CompletableFuture future = new CompletableFuture<>(); - SchemaRegistry registry = generateSchemaRegistry(); - - setSchemaManager(registry, generateDummySchemaManager()); - registry.checkpointCoordinator(0, future); - - while (!future.isDone()) { - Thread.sleep(1000); - } - return future.get(); - } - - @Override - public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { - SchemaRegistry expected = generateSchemaRegistry(); - SchemaRegistry actual = generateSchemaRegistry(); - actual.resetToCheckpoint(0, b); - return getSchemaManager(expected).equals(getSchemaManager(actual)); - } -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/pom.xml b/flink-cdc-migration-tests/flink-cdc-release-3.0.1/pom.xml deleted file mode 100644 index bdb5ecfe949..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/pom.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - 4.0.0 - - org.apache.flink - flink-cdc-migration-tests - ${revision} - - - flink-cdc-release-3.0.1 - flink-cdc-release-3.0.1 - - - - com.ververica - flink-cdc-base - 3.0.1 - - - com.ververica - flink-cdc-common - 3.0.1 - - - com.ververica - flink-cdc-runtime - 3.0.1 - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.2.4 - - - shade-flink-cdc - package - - shade - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - com.ververica.cdc - com.ververica.cdc.v3_0_1 - - - - - - - - - - \ No newline at end of file diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/MigrationMockBase.java b/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/MigrationMockBase.java deleted file mode 100644 index 2dd92446e45..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/MigrationMockBase.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.ververica.cdc.migration.tests; - -/** Base classes for migration test cases. */ -public interface MigrationMockBase { - int getSerializerVersion(); - - byte[] serializeObject() throws Exception; - - boolean deserializeAndCheckObject(int v, byte[] b) throws Exception; -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/SchemaManagerMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/SchemaManagerMigrationMock.java deleted file mode 100644 index 44477b5874d..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/SchemaManagerMigrationMock.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.ververica.cdc.migration.tests; - -import com.ververica.cdc.common.event.TableId; -import com.ververica.cdc.common.schema.Schema; -import com.ververica.cdc.common.types.DataTypes; -import com.ververica.cdc.runtime.operators.schema.coordinator.SchemaManager; - -import java.util.Collections; -import java.util.SortedMap; -import java.util.TreeMap; - -/** Dummy classes for migration test. Called via reflection. */ -public class SchemaManagerMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); - - public SchemaManager generateDummyObject() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); - } - - @Override - public int getSerializerVersion() { - return SchemaManager.SERIALIZER.getVersion(); - } - - @Override - public byte[] serializeObject() throws Exception { - return SchemaManager.SERIALIZER.serialize(generateDummyObject()); - } - - @Override - public boolean deserializeAndCheckObject(int version, byte[] serialized) throws Exception { - Object expected = generateDummyObject(); - Object actual = SchemaManager.SERIALIZER.deserialize(version, serialized); - return expected.equals(actual); - } -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/SchemaRegistryMigrationMock.java deleted file mode 100644 index 3322fb0be5f..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.0.1/src/main/java/com/ververica/cdc/migration/tests/SchemaRegistryMigrationMock.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.ververica.cdc.migration.tests; - -import com.ververica.cdc.common.event.TableId; -import com.ververica.cdc.common.schema.Schema; -import com.ververica.cdc.common.types.DataTypes; -import com.ververica.cdc.runtime.operators.schema.coordinator.SchemaManager; -import com.ververica.cdc.runtime.operators.schema.coordinator.SchemaRegistry; - -import java.lang.reflect.Field; -import java.util.Collections; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; - -/** Dummy classes for migration test. Called via reflection. */ -public class SchemaRegistryMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); - - public SchemaManager generateDummySchemaManager() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); - } - - public SchemaRegistry generateSchemaRegistry() { - return new SchemaRegistry("Dummy Name", null, e -> {}); - } - - private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - return (SchemaManager) field.get(schemaRegistry); - } - - private void setSchemaManager(SchemaRegistry schemaRegistry, SchemaManager schemaManager) - throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - field.set(schemaRegistry, schemaManager); - } - - @Override - public int getSerializerVersion() { - return -1; - } - - @Override - public byte[] serializeObject() throws Exception { - CompletableFuture future = new CompletableFuture<>(); - SchemaRegistry registry = generateSchemaRegistry(); - - setSchemaManager(registry, generateDummySchemaManager()); - registry.checkpointCoordinator(0, future); - - while (!future.isDone()) { - Thread.sleep(1000); - } - return future.get(); - } - - @Override - public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { - SchemaRegistry expected = generateSchemaRegistry(); - SchemaRegistry actual = generateSchemaRegistry(); - actual.resetToCheckpoint(0, b); - return getSchemaManager(expected).equals(getSchemaManager(actual)); - } -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java deleted file mode 100644 index 93269abece8..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.migration.tests; - -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.schema.Selectors; -import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaDerivation; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; - -import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; - -/** Dummy classes for migration test. Called via reflection. */ -public class SchemaRegistryMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); - - public SchemaManager generateDummySchemaManager() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); - } - - public SchemaRegistry generateSchemaRegistry() { - return new SchemaRegistry("Dummy Name", null, e -> {}, new ArrayList<>()); - } - - private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - return (SchemaManager) field.get(schemaRegistry); - } - - private void setSchemaManager(SchemaRegistry schemaRegistry, SchemaManager schemaManager) - throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - field.set(schemaRegistry, schemaManager); - } - - private SchemaDerivation getSchemaDerivation(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaDerivation"); - field.setAccessible(true); - return (SchemaDerivation) field.get(schemaRegistry); - } - - private List> getSchemaRoutes(SchemaRegistry schemaRegistry) - throws Exception { - SchemaDerivation schemaDerivation = getSchemaDerivation(schemaRegistry); - Field field = SchemaDerivation.class.getDeclaredField("routes"); - field.setAccessible(true); - return (List>) field.get(schemaDerivation); - } - - @Override - public int getSerializerVersion() { - return -1; - } - - @Override - public byte[] serializeObject() throws Exception { - CompletableFuture future = new CompletableFuture<>(); - SchemaRegistry registry = generateSchemaRegistry(); - setSchemaManager(registry, generateDummySchemaManager()); - - registry.checkpointCoordinator(0, future); - - while (!future.isDone()) { - Thread.sleep(1000); - } - return future.get(); - } - - @Override - public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { - SchemaRegistry expected = generateSchemaRegistry(); - setSchemaManager(expected, generateDummySchemaManager()); - SchemaRegistry actual = generateSchemaRegistry(); - actual.resetToCheckpoint(0, b); - return getSchemaManager(expected).equals(getSchemaManager(actual)) - && getSchemaRoutes(expected).equals(getSchemaRoutes(actual)); - } -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java deleted file mode 100644 index 93269abece8..00000000000 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.migration.tests; - -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.schema.Selectors; -import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaDerivation; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; - -import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; - -/** Dummy classes for migration test. Called via reflection. */ -public class SchemaRegistryMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); - - public SchemaManager generateDummySchemaManager() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); - } - - public SchemaRegistry generateSchemaRegistry() { - return new SchemaRegistry("Dummy Name", null, e -> {}, new ArrayList<>()); - } - - private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - return (SchemaManager) field.get(schemaRegistry); - } - - private void setSchemaManager(SchemaRegistry schemaRegistry, SchemaManager schemaManager) - throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - field.set(schemaRegistry, schemaManager); - } - - private SchemaDerivation getSchemaDerivation(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaDerivation"); - field.setAccessible(true); - return (SchemaDerivation) field.get(schemaRegistry); - } - - private List> getSchemaRoutes(SchemaRegistry schemaRegistry) - throws Exception { - SchemaDerivation schemaDerivation = getSchemaDerivation(schemaRegistry); - Field field = SchemaDerivation.class.getDeclaredField("routes"); - field.setAccessible(true); - return (List>) field.get(schemaDerivation); - } - - @Override - public int getSerializerVersion() { - return -1; - } - - @Override - public byte[] serializeObject() throws Exception { - CompletableFuture future = new CompletableFuture<>(); - SchemaRegistry registry = generateSchemaRegistry(); - setSchemaManager(registry, generateDummySchemaManager()); - - registry.checkpointCoordinator(0, future); - - while (!future.isDone()) { - Thread.sleep(1000); - } - return future.get(); - } - - @Override - public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { - SchemaRegistry expected = generateSchemaRegistry(); - setSchemaManager(expected, generateDummySchemaManager()); - SchemaRegistry actual = generateSchemaRegistry(); - actual.resetToCheckpoint(0, b); - return getSchemaManager(expected).equals(getSchemaManager(actual)) - && getSchemaRoutes(expected).equals(getSchemaRoutes(actual)); - } -} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/pom.xml b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/pom.xml similarity index 93% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.0/pom.xml rename to flink-cdc-migration-tests/flink-cdc-release-3.2.0/pom.xml index 3d6cd439421..5fe79d90aae 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/pom.xml +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/pom.xml @@ -25,24 +25,24 @@ limitations under the License. ${revision} - flink-cdc-release-3.1.0 - flink-cdc-release-3.1.0 + flink-cdc-release-3.2.0 + flink-cdc-release-3.2.0 org.apache.flink flink-cdc-base - 3.1.0 + 3.2.0 org.apache.flink flink-cdc-common - 3.1.0 + 3.2.0 org.apache.flink flink-cdc-runtime - 3.1.0 + 3.2.0 @@ -73,7 +73,7 @@ limitations under the License. org.apache.flink.cdc - org.apache.flink.cdc.v3_1_0 + org.apache.flink.cdc.v3_2_0 META-INF/*.SF,META-INF/*.DSA,META-INF/*.RSA @@ -83,5 +83,4 @@ limitations under the License. - \ No newline at end of file diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java similarity index 100% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java rename to flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java similarity index 54% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java rename to flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java index c4f0788dd0d..87f7d03ad4c 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java @@ -18,35 +18,58 @@ package org.apache.flink.cdc.migration.tests; import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; -import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; /** Dummy classes for migration test. Called via reflection. */ public class SchemaManagerMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); + private static final TableId TABLE_1 = TableId.tableId("ns", "scm", "tbl1"); + private static final TableId TABLE_2 = TableId.tableId("ns", "scm", "tbl2"); private static final String SCHEMA_MANAGER = "runtime.operators.schema.coordinator.SchemaManager"; + private static Schema genSchema(String identifier) { + return Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("col_" + identifier, DataTypes.STRING()) + .primaryKey("id") + .build(); + } + + private static final Map> ORIGINAL_SCHEMA_MAP; + private static final Map> EVOLVED_SCHEMA_MAP; + + static { + SortedMap originalSchemas = new TreeMap<>(); + originalSchemas.put(1, genSchema("upstream_1")); + originalSchemas.put(2, genSchema("upstream_2")); + originalSchemas.put(3, genSchema("upstream_3")); + + SortedMap evolvedSchemas = new TreeMap<>(); + evolvedSchemas.put(1, genSchema("evolved_1")); + evolvedSchemas.put(2, genSchema("evolved_2")); + evolvedSchemas.put(3, genSchema("evolved_3")); + + ORIGINAL_SCHEMA_MAP = new HashMap<>(); + ORIGINAL_SCHEMA_MAP.put(TABLE_1, originalSchemas); + ORIGINAL_SCHEMA_MAP.put(TABLE_2, originalSchemas); + + EVOLVED_SCHEMA_MAP = new HashMap<>(); + EVOLVED_SCHEMA_MAP.put(TABLE_1, evolvedSchemas); + EVOLVED_SCHEMA_MAP.put(TABLE_2, evolvedSchemas); + } + public SchemaManager generateDummyObject() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); + return new SchemaManager( + ORIGINAL_SCHEMA_MAP, EVOLVED_SCHEMA_MAP, SchemaChangeBehavior.TRY_EVOLVE); } @Override diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java new file mode 100644 index 00000000000..41f0d8ac0bd --- /dev/null +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.migration.tests; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.schema.Selectors; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaDerivation; +import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; +import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; + +/** Dummy classes for migration test. Called via reflection. */ +public class SchemaRegistryMigrationMock implements MigrationMockBase { + private static final TableId TABLE_1 = TableId.tableId("ns", "scm", "tbl1"); + private static final TableId TABLE_2 = TableId.tableId("ns", "scm", "tbl2"); + + private static Schema genSchema(String identifier) { + return Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("col_" + identifier, DataTypes.STRING()) + .primaryKey("id") + .build(); + } + + private static final Map> ORIGINAL_SCHEMA_MAP; + private static final Map> EVOLVED_SCHEMA_MAP; + + static { + SortedMap originalSchemas = new TreeMap<>(); + originalSchemas.put(1, genSchema("upstream_1")); + originalSchemas.put(2, genSchema("upstream_2")); + originalSchemas.put(3, genSchema("upstream_3")); + + SortedMap evolvedSchemas = new TreeMap<>(); + evolvedSchemas.put(1, genSchema("evolved_1")); + evolvedSchemas.put(2, genSchema("evolved_2")); + evolvedSchemas.put(3, genSchema("evolved_3")); + + ORIGINAL_SCHEMA_MAP = new HashMap<>(); + ORIGINAL_SCHEMA_MAP.put(TABLE_1, originalSchemas); + ORIGINAL_SCHEMA_MAP.put(TABLE_2, originalSchemas); + + EVOLVED_SCHEMA_MAP = new HashMap<>(); + EVOLVED_SCHEMA_MAP.put(TABLE_1, evolvedSchemas); + EVOLVED_SCHEMA_MAP.put(TABLE_2, evolvedSchemas); + } + + public SchemaManager generateDummySchemaManager() { + return new SchemaManager(new HashMap<>(), new HashMap<>(), SchemaChangeBehavior.EVOLVE); + } + + public SchemaRegistry generateSchemaRegistry() { + return new SchemaRegistry( + "Dummy Name", + null, + Executors.newSingleThreadExecutor(), + e -> {}, + new ArrayList<>()); + } + + private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { + Field managerField = SchemaRegistry.class.getDeclaredField("schemaManager"); + managerField.setAccessible(true); + return (SchemaManager) managerField.get(schemaRegistry); + } + + @SuppressWarnings("unchecked") + private Map> getOriginalSchemaMap( + SchemaRegistry schemaRegistry) throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field originalField = SchemaManager.class.getDeclaredField("originalSchemas"); + originalField.setAccessible(true); + return (Map>) originalField.get(schemaManager); + } + + private void setOriginalSchemaMap( + SchemaRegistry schemaRegistry, + Map> originalSchemaMap) + throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field field = SchemaManager.class.getDeclaredField("originalSchemas"); + field.setAccessible(true); + field.set(schemaManager, originalSchemaMap); + } + + @SuppressWarnings("unchecked") + private Map> getEvolvedSchemaMap( + SchemaRegistry schemaRegistry) throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field originalField = SchemaManager.class.getDeclaredField("evolvedSchemas"); + originalField.setAccessible(true); + return (Map>) originalField.get(schemaManager); + } + + private void setEvolvedSchemaMap( + SchemaRegistry schemaRegistry, + Map> evolvedSchemaMap) + throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field field = SchemaManager.class.getDeclaredField("evolvedSchemas"); + field.setAccessible(true); + field.set(schemaManager, evolvedSchemaMap); + } + + private SchemaDerivation getSchemaDerivation(SchemaRegistry schemaRegistry) throws Exception { + Field field = SchemaRegistry.class.getDeclaredField("schemaDerivation"); + field.setAccessible(true); + return (SchemaDerivation) field.get(schemaRegistry); + } + + @SuppressWarnings("unchecked") + private List> getSchemaRoutes(SchemaRegistry schemaRegistry) + throws Exception { + SchemaDerivation schemaDerivation = getSchemaDerivation(schemaRegistry); + Field field = SchemaDerivation.class.getDeclaredField("routes"); + field.setAccessible(true); + return (List>) field.get(schemaDerivation); + } + + @Override + public int getSerializerVersion() { + return -1; + } + + @Override + public byte[] serializeObject() throws Exception { + CompletableFuture future = new CompletableFuture<>(); + SchemaRegistry registry = generateSchemaRegistry(); + setOriginalSchemaMap(registry, ORIGINAL_SCHEMA_MAP); + setEvolvedSchemaMap(registry, EVOLVED_SCHEMA_MAP); + + registry.checkpointCoordinator(0, future); + + while (!future.isDone()) { + Thread.sleep(1000); + } + return future.get(); + } + + @Override + public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { + SchemaRegistry expected = generateSchemaRegistry(); + setOriginalSchemaMap(expected, ORIGINAL_SCHEMA_MAP); + setEvolvedSchemaMap(expected, EVOLVED_SCHEMA_MAP); + + SchemaRegistry actual = generateSchemaRegistry(); + actual.resetToCheckpoint(0, b); + + return getOriginalSchemaMap(expected).equals(getOriginalSchemaMap(actual)) + && getEvolvedSchemaMap(expected).equals(getEvolvedSchemaMap(actual)) + && getSchemaRoutes(expected).equals(getSchemaRoutes(actual)); + } +} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java similarity index 72% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java rename to flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java index 6a14a2be2a5..1317c73ebf4 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.0/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java @@ -20,7 +20,7 @@ import org.apache.flink.cdc.common.event.TableId; import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.operators.transform.TableChangeInfo; +import org.apache.flink.cdc.runtime.operators.transform.PreTransformChangeInfo; /** Dummy classes for migration test. Called via reflection. */ public class TableChangeInfoMigrationMock implements MigrationMockBase { @@ -34,27 +34,28 @@ public class TableChangeInfoMigrationMock implements MigrationMockBase { .primaryKey("id", "name") .build(); - public TableChangeInfo generateDummyObject() { - return TableChangeInfo.of(DUMMY_TABLE_ID, DUMMY_SCHEMA, DUMMY_SCHEMA); + public PreTransformChangeInfo generateDummyObject() { + return PreTransformChangeInfo.of(DUMMY_TABLE_ID, DUMMY_SCHEMA, DUMMY_SCHEMA); } @Override public int getSerializerVersion() { - return TableChangeInfo.SERIALIZER.getVersion(); + return PreTransformChangeInfo.SERIALIZER.getVersion(); } @Override public byte[] serializeObject() throws Exception { - return TableChangeInfo.SERIALIZER.serialize(generateDummyObject()); + return PreTransformChangeInfo.SERIALIZER.serialize(generateDummyObject()); } @Override public boolean deserializeAndCheckObject(int version, byte[] bytes) throws Exception { - TableChangeInfo expected = generateDummyObject(); - TableChangeInfo actual = TableChangeInfo.SERIALIZER.deserialize(version, bytes); + PreTransformChangeInfo expected = generateDummyObject(); + PreTransformChangeInfo actual = + PreTransformChangeInfo.SERIALIZER.deserialize(version, bytes); return expected.getTableId().equals(actual.getTableId()) - && expected.getOriginalSchema().equals(actual.getOriginalSchema()) - && expected.getTransformedSchema().equals(actual.getTransformedSchema()); + && expected.getSourceSchema().equals(actual.getSourceSchema()) + && expected.getPreTransformedSchema().equals(actual.getPreTransformedSchema()); } } diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/pom.xml b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/pom.xml similarity index 93% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.1/pom.xml rename to flink-cdc-migration-tests/flink-cdc-release-3.2.1/pom.xml index 751b0f9d877..6cace468bdf 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/pom.xml +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/pom.xml @@ -25,24 +25,24 @@ limitations under the License. ${revision} - flink-cdc-release-3.1.1 - flink-cdc-release-3.1.1 + flink-cdc-release-3.2.1 + flink-cdc-release-3.2.1 org.apache.flink flink-cdc-base - 3.1.1 + 3.2.1 org.apache.flink flink-cdc-common - 3.1.1 + 3.2.1 org.apache.flink flink-cdc-runtime - 3.1.1 + 3.2.1 @@ -73,7 +73,7 @@ limitations under the License. org.apache.flink.cdc - org.apache.flink.cdc.v3_1_1 + org.apache.flink.cdc.v3_2_1 META-INF/*.SF,META-INF/*.DSA,META-INF/*.RSA @@ -83,5 +83,4 @@ limitations under the License. - \ No newline at end of file diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java similarity index 100% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java rename to flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/MigrationMockBase.java diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java similarity index 54% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java rename to flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java index c4f0788dd0d..87f7d03ad4c 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java @@ -18,35 +18,58 @@ package org.apache.flink.cdc.migration.tests; import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; -import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; /** Dummy classes for migration test. Called via reflection. */ public class SchemaManagerMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); + private static final TableId TABLE_1 = TableId.tableId("ns", "scm", "tbl1"); + private static final TableId TABLE_2 = TableId.tableId("ns", "scm", "tbl2"); private static final String SCHEMA_MANAGER = "runtime.operators.schema.coordinator.SchemaManager"; + private static Schema genSchema(String identifier) { + return Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("col_" + identifier, DataTypes.STRING()) + .primaryKey("id") + .build(); + } + + private static final Map> ORIGINAL_SCHEMA_MAP; + private static final Map> EVOLVED_SCHEMA_MAP; + + static { + SortedMap originalSchemas = new TreeMap<>(); + originalSchemas.put(1, genSchema("upstream_1")); + originalSchemas.put(2, genSchema("upstream_2")); + originalSchemas.put(3, genSchema("upstream_3")); + + SortedMap evolvedSchemas = new TreeMap<>(); + evolvedSchemas.put(1, genSchema("evolved_1")); + evolvedSchemas.put(2, genSchema("evolved_2")); + evolvedSchemas.put(3, genSchema("evolved_3")); + + ORIGINAL_SCHEMA_MAP = new HashMap<>(); + ORIGINAL_SCHEMA_MAP.put(TABLE_1, originalSchemas); + ORIGINAL_SCHEMA_MAP.put(TABLE_2, originalSchemas); + + EVOLVED_SCHEMA_MAP = new HashMap<>(); + EVOLVED_SCHEMA_MAP.put(TABLE_1, evolvedSchemas); + EVOLVED_SCHEMA_MAP.put(TABLE_2, evolvedSchemas); + } + public SchemaManager generateDummyObject() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager(Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions)); + return new SchemaManager( + ORIGINAL_SCHEMA_MAP, EVOLVED_SCHEMA_MAP, SchemaChangeBehavior.TRY_EVOLVE); } @Override diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java new file mode 100644 index 00000000000..41f0d8ac0bd --- /dev/null +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.migration.tests; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.schema.Selectors; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaDerivation; +import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; +import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; + +/** Dummy classes for migration test. Called via reflection. */ +public class SchemaRegistryMigrationMock implements MigrationMockBase { + private static final TableId TABLE_1 = TableId.tableId("ns", "scm", "tbl1"); + private static final TableId TABLE_2 = TableId.tableId("ns", "scm", "tbl2"); + + private static Schema genSchema(String identifier) { + return Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("col_" + identifier, DataTypes.STRING()) + .primaryKey("id") + .build(); + } + + private static final Map> ORIGINAL_SCHEMA_MAP; + private static final Map> EVOLVED_SCHEMA_MAP; + + static { + SortedMap originalSchemas = new TreeMap<>(); + originalSchemas.put(1, genSchema("upstream_1")); + originalSchemas.put(2, genSchema("upstream_2")); + originalSchemas.put(3, genSchema("upstream_3")); + + SortedMap evolvedSchemas = new TreeMap<>(); + evolvedSchemas.put(1, genSchema("evolved_1")); + evolvedSchemas.put(2, genSchema("evolved_2")); + evolvedSchemas.put(3, genSchema("evolved_3")); + + ORIGINAL_SCHEMA_MAP = new HashMap<>(); + ORIGINAL_SCHEMA_MAP.put(TABLE_1, originalSchemas); + ORIGINAL_SCHEMA_MAP.put(TABLE_2, originalSchemas); + + EVOLVED_SCHEMA_MAP = new HashMap<>(); + EVOLVED_SCHEMA_MAP.put(TABLE_1, evolvedSchemas); + EVOLVED_SCHEMA_MAP.put(TABLE_2, evolvedSchemas); + } + + public SchemaManager generateDummySchemaManager() { + return new SchemaManager(new HashMap<>(), new HashMap<>(), SchemaChangeBehavior.EVOLVE); + } + + public SchemaRegistry generateSchemaRegistry() { + return new SchemaRegistry( + "Dummy Name", + null, + Executors.newSingleThreadExecutor(), + e -> {}, + new ArrayList<>()); + } + + private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { + Field managerField = SchemaRegistry.class.getDeclaredField("schemaManager"); + managerField.setAccessible(true); + return (SchemaManager) managerField.get(schemaRegistry); + } + + @SuppressWarnings("unchecked") + private Map> getOriginalSchemaMap( + SchemaRegistry schemaRegistry) throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field originalField = SchemaManager.class.getDeclaredField("originalSchemas"); + originalField.setAccessible(true); + return (Map>) originalField.get(schemaManager); + } + + private void setOriginalSchemaMap( + SchemaRegistry schemaRegistry, + Map> originalSchemaMap) + throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field field = SchemaManager.class.getDeclaredField("originalSchemas"); + field.setAccessible(true); + field.set(schemaManager, originalSchemaMap); + } + + @SuppressWarnings("unchecked") + private Map> getEvolvedSchemaMap( + SchemaRegistry schemaRegistry) throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field originalField = SchemaManager.class.getDeclaredField("evolvedSchemas"); + originalField.setAccessible(true); + return (Map>) originalField.get(schemaManager); + } + + private void setEvolvedSchemaMap( + SchemaRegistry schemaRegistry, + Map> evolvedSchemaMap) + throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field field = SchemaManager.class.getDeclaredField("evolvedSchemas"); + field.setAccessible(true); + field.set(schemaManager, evolvedSchemaMap); + } + + private SchemaDerivation getSchemaDerivation(SchemaRegistry schemaRegistry) throws Exception { + Field field = SchemaRegistry.class.getDeclaredField("schemaDerivation"); + field.setAccessible(true); + return (SchemaDerivation) field.get(schemaRegistry); + } + + @SuppressWarnings("unchecked") + private List> getSchemaRoutes(SchemaRegistry schemaRegistry) + throws Exception { + SchemaDerivation schemaDerivation = getSchemaDerivation(schemaRegistry); + Field field = SchemaDerivation.class.getDeclaredField("routes"); + field.setAccessible(true); + return (List>) field.get(schemaDerivation); + } + + @Override + public int getSerializerVersion() { + return -1; + } + + @Override + public byte[] serializeObject() throws Exception { + CompletableFuture future = new CompletableFuture<>(); + SchemaRegistry registry = generateSchemaRegistry(); + setOriginalSchemaMap(registry, ORIGINAL_SCHEMA_MAP); + setEvolvedSchemaMap(registry, EVOLVED_SCHEMA_MAP); + + registry.checkpointCoordinator(0, future); + + while (!future.isDone()) { + Thread.sleep(1000); + } + return future.get(); + } + + @Override + public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { + SchemaRegistry expected = generateSchemaRegistry(); + setOriginalSchemaMap(expected, ORIGINAL_SCHEMA_MAP); + setEvolvedSchemaMap(expected, EVOLVED_SCHEMA_MAP); + + SchemaRegistry actual = generateSchemaRegistry(); + actual.resetToCheckpoint(0, b); + + return getOriginalSchemaMap(expected).equals(getOriginalSchemaMap(actual)) + && getEvolvedSchemaMap(expected).equals(getEvolvedSchemaMap(actual)) + && getSchemaRoutes(expected).equals(getSchemaRoutes(actual)); + } +} diff --git a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java similarity index 72% rename from flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java rename to flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java index 6a14a2be2a5..1317c73ebf4 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-3.1.0/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java +++ b/flink-cdc-migration-tests/flink-cdc-release-3.2.1/src/main/java/org/apache/flink/cdc/migration/tests/TableChangeInfoMigrationMock.java @@ -20,7 +20,7 @@ import org.apache.flink.cdc.common.event.TableId; import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.operators.transform.TableChangeInfo; +import org.apache.flink.cdc.runtime.operators.transform.PreTransformChangeInfo; /** Dummy classes for migration test. Called via reflection. */ public class TableChangeInfoMigrationMock implements MigrationMockBase { @@ -34,27 +34,28 @@ public class TableChangeInfoMigrationMock implements MigrationMockBase { .primaryKey("id", "name") .build(); - public TableChangeInfo generateDummyObject() { - return TableChangeInfo.of(DUMMY_TABLE_ID, DUMMY_SCHEMA, DUMMY_SCHEMA); + public PreTransformChangeInfo generateDummyObject() { + return PreTransformChangeInfo.of(DUMMY_TABLE_ID, DUMMY_SCHEMA, DUMMY_SCHEMA); } @Override public int getSerializerVersion() { - return TableChangeInfo.SERIALIZER.getVersion(); + return PreTransformChangeInfo.SERIALIZER.getVersion(); } @Override public byte[] serializeObject() throws Exception { - return TableChangeInfo.SERIALIZER.serialize(generateDummyObject()); + return PreTransformChangeInfo.SERIALIZER.serialize(generateDummyObject()); } @Override public boolean deserializeAndCheckObject(int version, byte[] bytes) throws Exception { - TableChangeInfo expected = generateDummyObject(); - TableChangeInfo actual = TableChangeInfo.SERIALIZER.deserialize(version, bytes); + PreTransformChangeInfo expected = generateDummyObject(); + PreTransformChangeInfo actual = + PreTransformChangeInfo.SERIALIZER.deserialize(version, bytes); return expected.getTableId().equals(actual.getTableId()) - && expected.getOriginalSchema().equals(actual.getOriginalSchema()) - && expected.getTransformedSchema().equals(actual.getTransformedSchema()); + && expected.getSourceSchema().equals(actual.getSourceSchema()) + && expected.getPreTransformedSchema().equals(actual.getPreTransformedSchema()); } } diff --git a/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java index 7774604d7ed..16d8ad75363 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java +++ b/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaManagerMigrationMock.java @@ -21,33 +21,55 @@ import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaManager; -import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; /** Dummy classes for migration test. Called via reflection. */ public class SchemaManagerMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); + private static final TableId TABLE_1 = TableId.tableId("ns", "scm", "tbl1"); + private static final TableId TABLE_2 = TableId.tableId("ns", "scm", "tbl2"); + + private static final String SCHEMA_MANAGER = + "runtime.operators.schema.coordinator.SchemaManager"; + + private static Schema genSchema(String identifier) { + return Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("col_" + identifier, DataTypes.STRING()) + .primaryKey("id") + .build(); + } + + private static final Map> ORIGINAL_SCHEMA_MAP; + private static final Map> EVOLVED_SCHEMA_MAP; + + static { + SortedMap originalSchemas = new TreeMap<>(); + originalSchemas.put(1, genSchema("upstream_1")); + originalSchemas.put(2, genSchema("upstream_2")); + originalSchemas.put(3, genSchema("upstream_3")); + + SortedMap evolvedSchemas = new TreeMap<>(); + evolvedSchemas.put(1, genSchema("evolved_1")); + evolvedSchemas.put(2, genSchema("evolved_2")); + evolvedSchemas.put(3, genSchema("evolved_3")); + + ORIGINAL_SCHEMA_MAP = new HashMap<>(); + ORIGINAL_SCHEMA_MAP.put(TABLE_1, originalSchemas); + ORIGINAL_SCHEMA_MAP.put(TABLE_2, originalSchemas); + + EVOLVED_SCHEMA_MAP = new HashMap<>(); + EVOLVED_SCHEMA_MAP.put(TABLE_1, evolvedSchemas); + EVOLVED_SCHEMA_MAP.put(TABLE_2, evolvedSchemas); + } public SchemaManager generateDummyObject() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); return new SchemaManager( - Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions), - Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions), - SchemaChangeBehavior.EVOLVE); + ORIGINAL_SCHEMA_MAP, EVOLVED_SCHEMA_MAP, SchemaChangeBehavior.TRY_EVOLVE); } @Override diff --git a/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java b/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java index 34cd1ea0343..b9c8c2d1408 100644 --- a/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java +++ b/flink-cdc-migration-tests/flink-cdc-release-snapshot/src/main/java/org/apache/flink/cdc/migration/tests/SchemaRegistryMigrationMock.java @@ -17,107 +17,131 @@ package org.apache.flink.cdc.migration.tests; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEventType; -import org.apache.flink.cdc.common.event.SchemaChangeEventTypeFamily; import org.apache.flink.cdc.common.event.TableId; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.schema.Selectors; -import org.apache.flink.cdc.common.sink.MetadataApplier; import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaDerivation; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaManager; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaManager; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaCoordinator; +import org.apache.flink.metrics.groups.OperatorCoordinatorMetricGroup; +import org.apache.flink.runtime.checkpoint.CheckpointCoordinator; +import org.apache.flink.runtime.jobgraph.OperatorID; +import org.apache.flink.runtime.operators.coordination.CoordinatorStore; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; + +import javax.annotation.Nullable; import java.lang.reflect.Field; +import java.time.Duration; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Set; +import java.util.HashMap; +import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; -import java.util.stream.Collectors; /** Dummy classes for migration test. Called via reflection. */ public class SchemaRegistryMigrationMock implements MigrationMockBase { - private static final TableId DUMMY_TABLE_ID = - TableId.tableId("dummyNamespace", "dummySchema", "dummyTable"); - private static final Schema DUMMY_SCHEMA = - Schema.newBuilder() - .physicalColumn("id", DataTypes.INT()) - .physicalColumn("name", DataTypes.STRING()) - .physicalColumn("age", DataTypes.DOUBLE()) - .primaryKey("id", "name") - .build(); + private static final TableId TABLE_1 = TableId.tableId("ns", "scm", "tbl1"); + private static final TableId TABLE_2 = TableId.tableId("ns", "scm", "tbl2"); + + private static Schema genSchema(String identifier) { + return Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("col_" + identifier, DataTypes.STRING()) + .primaryKey("id") + .build(); + } + + private static final Map> ORIGINAL_SCHEMA_MAP; + private static final Map> EVOLVED_SCHEMA_MAP; + + static { + SortedMap originalSchemas = new TreeMap<>(); + originalSchemas.put(1, genSchema("upstream_1")); + originalSchemas.put(2, genSchema("upstream_2")); + originalSchemas.put(3, genSchema("upstream_3")); + + SortedMap evolvedSchemas = new TreeMap<>(); + evolvedSchemas.put(1, genSchema("evolved_1")); + evolvedSchemas.put(2, genSchema("evolved_2")); + evolvedSchemas.put(3, genSchema("evolved_3")); + + ORIGINAL_SCHEMA_MAP = new HashMap<>(); + ORIGINAL_SCHEMA_MAP.put(TABLE_1, originalSchemas); + ORIGINAL_SCHEMA_MAP.put(TABLE_2, originalSchemas); + + EVOLVED_SCHEMA_MAP = new HashMap<>(); + EVOLVED_SCHEMA_MAP.put(TABLE_1, evolvedSchemas); + EVOLVED_SCHEMA_MAP.put(TABLE_2, evolvedSchemas); + } public SchemaManager generateDummySchemaManager() { - SortedMap schemaVersions = new TreeMap<>(); - schemaVersions.put(1, DUMMY_SCHEMA); - schemaVersions.put(2, DUMMY_SCHEMA); - schemaVersions.put(3, DUMMY_SCHEMA); - return new SchemaManager( - Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions), - Collections.singletonMap(DUMMY_TABLE_ID, schemaVersions), - SchemaChangeBehavior.EVOLVE); + return new SchemaManager(new HashMap<>(), new HashMap<>(), SchemaChangeBehavior.EVOLVE); } - public SchemaRegistry generateSchemaRegistry() { - return new SchemaRegistry( - "Dummy Name", - null, - Executors.newFixedThreadPool(1), - new MetadataApplier() { - @Override - public boolean acceptsSchemaEvolutionType( - SchemaChangeEventType schemaChangeEventType) { - return true; - } - - @Override - public Set getSupportedSchemaEvolutionTypes() { - return Arrays.stream(SchemaChangeEventTypeFamily.ALL) - .collect(Collectors.toSet()); - } - - @Override - public void applySchemaChange(SchemaChangeEvent schemaChangeEvent) { - // Do nothing - } - }, - new ArrayList<>(), - SchemaChangeBehavior.EVOLVE); + public SchemaCoordinator generateSchemaRegistry() { + SchemaCoordinator coordinator = + new SchemaCoordinator( + "Dummy Name", + MOCKED_CONTEXT, + Executors.newSingleThreadExecutor(), + e -> {}, + new ArrayList<>(), + SchemaChangeBehavior.EVOLVE, + Duration.ofMinutes(3)); + try { + coordinator.start(); + } catch (Exception e) { + throw new RuntimeException(e); + } + return coordinator; } - private SchemaManager getSchemaManager(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); - field.setAccessible(true); - return (SchemaManager) field.get(schemaRegistry); + private SchemaManager getSchemaManager(SchemaCoordinator schemaCoordinator) throws Exception { + Field managerField = + SchemaCoordinator.class.getSuperclass().getDeclaredField("schemaManager"); + managerField.setAccessible(true); + return (SchemaManager) managerField.get(schemaCoordinator); + } + + @SuppressWarnings("unchecked") + private Map> getOriginalSchemaMap( + SchemaCoordinator schemaRegistry) throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field originalField = SchemaManager.class.getDeclaredField("originalSchemas"); + originalField.setAccessible(true); + return (Map>) originalField.get(schemaManager); } - private void setSchemaManager(SchemaRegistry schemaRegistry, SchemaManager schemaManager) + private void setOriginalSchemaMap( + SchemaCoordinator schemaRegistry, + Map> originalSchemaMap) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaManager"); + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field field = SchemaManager.class.getDeclaredField("originalSchemas"); field.setAccessible(true); - field.set(schemaRegistry, schemaManager); + field.set(schemaManager, originalSchemaMap); } - private SchemaDerivation getSchemaDerivation(SchemaRegistry schemaRegistry) throws Exception { - Field field = SchemaRegistry.class.getDeclaredField("schemaDerivation"); - field.setAccessible(true); - return (SchemaDerivation) field.get(schemaRegistry); + @SuppressWarnings("unchecked") + private Map> getEvolvedSchemaMap( + SchemaCoordinator schemaRegistry) throws Exception { + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field originalField = SchemaManager.class.getDeclaredField("evolvedSchemas"); + originalField.setAccessible(true); + return (Map>) originalField.get(schemaManager); } - private List> getSchemaRoutes(SchemaRegistry schemaRegistry) + private void setEvolvedSchemaMap( + SchemaCoordinator schemaRegistry, + Map> evolvedSchemaMap) throws Exception { - SchemaDerivation schemaDerivation = getSchemaDerivation(schemaRegistry); - Field field = SchemaDerivation.class.getDeclaredField("routes"); + SchemaManager schemaManager = getSchemaManager(schemaRegistry); + Field field = SchemaManager.class.getDeclaredField("evolvedSchemas"); field.setAccessible(true); - return (List>) field.get(schemaDerivation); + field.set(schemaManager, evolvedSchemaMap); } @Override @@ -128,8 +152,9 @@ public int getSerializerVersion() { @Override public byte[] serializeObject() throws Exception { CompletableFuture future = new CompletableFuture<>(); - SchemaRegistry registry = generateSchemaRegistry(); - setSchemaManager(registry, generateDummySchemaManager()); + SchemaCoordinator registry = generateSchemaRegistry(); + setOriginalSchemaMap(registry, ORIGINAL_SCHEMA_MAP); + setEvolvedSchemaMap(registry, EVOLVED_SCHEMA_MAP); registry.checkpointCoordinator(0, future); @@ -141,11 +166,57 @@ public byte[] serializeObject() throws Exception { @Override public boolean deserializeAndCheckObject(int v, byte[] b) throws Exception { - SchemaRegistry expected = generateSchemaRegistry(); - setSchemaManager(expected, generateDummySchemaManager()); - SchemaRegistry actual = generateSchemaRegistry(); + SchemaCoordinator expected = generateSchemaRegistry(); + setOriginalSchemaMap(expected, ORIGINAL_SCHEMA_MAP); + setEvolvedSchemaMap(expected, EVOLVED_SCHEMA_MAP); + + SchemaCoordinator actual = generateSchemaRegistry(); actual.resetToCheckpoint(0, b); - return getSchemaManager(expected).equals(getSchemaManager(actual)) - && getSchemaRoutes(expected).equals(getSchemaRoutes(actual)); + + return getOriginalSchemaMap(expected).equals(getOriginalSchemaMap(actual)) + && getEvolvedSchemaMap(expected).equals(getEvolvedSchemaMap(actual)); } + + private static final OperatorCoordinator.Context MOCKED_CONTEXT = + new OperatorCoordinator.Context() { + + @Override + public OperatorID getOperatorId() { + return null; + } + + @Override + public OperatorCoordinatorMetricGroup metricGroup() { + return null; + } + + @Override + public void failJob(Throwable throwable) {} + + @Override + public int currentParallelism() { + return 0; + } + + @Override + public ClassLoader getUserCodeClassloader() { + return null; + } + + @Override + public CoordinatorStore getCoordinatorStore() { + return null; + } + + @Override + public boolean isConcurrentExecutionAttemptsSupported() { + return false; + } + + @Nullable + @Override + public CheckpointCoordinator getCheckpointCoordinator() { + return null; + } + }; } diff --git a/flink-cdc-migration-tests/pom.xml b/flink-cdc-migration-tests/pom.xml index 3033ef712db..8720b1da1f8 100644 --- a/flink-cdc-migration-tests/pom.xml +++ b/flink-cdc-migration-tests/pom.xml @@ -30,10 +30,8 @@ limitations under the License. pom - flink-cdc-release-3.0.0 - flink-cdc-release-3.0.1 - flink-cdc-release-3.1.0 - flink-cdc-release-3.1.1 + flink-cdc-release-3.2.0 + flink-cdc-release-3.2.1 flink-cdc-release-snapshot flink-cdc-migration-testcases diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperator.java deleted file mode 100644 index 179a71e2dfa..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperator.java +++ /dev/null @@ -1,744 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema; - -import org.apache.flink.api.java.tuple.Tuple3; -import org.apache.flink.cdc.common.annotation.Internal; -import org.apache.flink.cdc.common.annotation.VisibleForTesting; -import org.apache.flink.cdc.common.data.DecimalData; -import org.apache.flink.cdc.common.data.LocalZonedTimestampData; -import org.apache.flink.cdc.common.data.RecordData; -import org.apache.flink.cdc.common.data.StringData; -import org.apache.flink.cdc.common.data.TimestampData; -import org.apache.flink.cdc.common.data.ZonedTimestampData; -import org.apache.flink.cdc.common.event.DataChangeEvent; -import org.apache.flink.cdc.common.event.DropTableEvent; -import org.apache.flink.cdc.common.event.Event; -import org.apache.flink.cdc.common.event.FlushEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEventType; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; -import org.apache.flink.cdc.common.route.RouteRule; -import org.apache.flink.cdc.common.schema.Column; -import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.schema.Selectors; -import org.apache.flink.cdc.common.types.DataType; -import org.apache.flink.cdc.common.types.DataTypeFamily; -import org.apache.flink.cdc.common.types.DataTypeRoot; -import org.apache.flink.cdc.common.types.DecimalType; -import org.apache.flink.cdc.common.utils.ChangeEventUtils; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.apache.flink.cdc.runtime.operators.schema.event.CoordinationResponseUtils; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeProcessingResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResultRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResultResponse; -import org.apache.flink.cdc.runtime.operators.schema.metrics.SchemaOperatorMetrics; -import org.apache.flink.cdc.runtime.operators.sink.SchemaEvolutionClient; -import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; -import org.apache.flink.cdc.runtime.typeutils.NonceUtils; -import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway; -import org.apache.flink.runtime.operators.coordination.CoordinationRequest; -import org.apache.flink.runtime.operators.coordination.CoordinationResponse; -import org.apache.flink.runtime.state.StateSnapshotContext; -import org.apache.flink.streaming.api.graph.StreamConfig; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.ChainingStrategy; -import org.apache.flink.streaming.api.operators.OneInputStreamOperator; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.StreamTask; -import org.apache.flink.util.SerializedValue; - -import org.apache.flink.shaded.guava31.com.google.common.cache.CacheBuilder; -import org.apache.flink.shaded.guava31.com.google.common.cache.CacheLoader; -import org.apache.flink.shaded.guava31.com.google.common.cache.LoadingCache; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nullable; - -import java.io.Serializable; -import java.math.BigDecimal; -import java.time.Duration; -import java.time.Instant; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; - -import static org.apache.flink.cdc.common.pipeline.PipelineOptions.DEFAULT_SCHEMA_OPERATOR_RPC_TIMEOUT; - -/** - * The operator will evolve schemas in {@link SchemaRegistry} for incoming {@link - * SchemaChangeEvent}s and block the stream for tables before their schema changes finish. - */ -@Internal -public class SchemaOperator extends AbstractStreamOperator - implements OneInputStreamOperator, Serializable { - - private static final long serialVersionUID = 1L; - - private static final Logger LOG = LoggerFactory.getLogger(SchemaOperator.class); - private static final Duration CACHE_EXPIRE_DURATION = Duration.ofDays(1); - - private final List routingRules; - - private final String timezone; - - /** - * Storing route source table selector, sink table name (before symbol replacement), and replace - * symbol in a tuple. - */ - private transient List> routes; - - private transient TaskOperatorEventGateway toCoordinator; - private transient SchemaEvolutionClient schemaEvolutionClient; - private transient LoadingCache originalSchema; - private transient LoadingCache evolvedSchema; - private transient LoadingCache schemaDivergesMap; - - /** - * Storing mapping relations between upstream tableId (source table) mapping to downstream - * tableIds (sink tables). - */ - private transient LoadingCache> tableIdMappingCache; - - private final long rpcTimeOutInMillis; - private final SchemaChangeBehavior schemaChangeBehavior; - - private transient SchemaOperatorMetrics schemaOperatorMetrics; - private transient int subTaskId; - - @VisibleForTesting - public SchemaOperator(List routingRules) { - this.routingRules = routingRules; - this.chainingStrategy = ChainingStrategy.ALWAYS; - this.rpcTimeOutInMillis = DEFAULT_SCHEMA_OPERATOR_RPC_TIMEOUT.toMillis(); - this.schemaChangeBehavior = SchemaChangeBehavior.EVOLVE; - this.timezone = "UTC"; - } - - @VisibleForTesting - public SchemaOperator(List routingRules, Duration rpcTimeOut) { - this.routingRules = routingRules; - this.chainingStrategy = ChainingStrategy.ALWAYS; - this.rpcTimeOutInMillis = rpcTimeOut.toMillis(); - this.schemaChangeBehavior = SchemaChangeBehavior.EVOLVE; - this.timezone = "UTC"; - } - - @VisibleForTesting - public SchemaOperator( - List routingRules, - Duration rpcTimeOut, - SchemaChangeBehavior schemaChangeBehavior) { - this.routingRules = routingRules; - this.chainingStrategy = ChainingStrategy.ALWAYS; - this.rpcTimeOutInMillis = rpcTimeOut.toMillis(); - this.schemaChangeBehavior = schemaChangeBehavior; - this.timezone = "UTC"; - } - - public SchemaOperator( - List routingRules, - Duration rpcTimeOut, - SchemaChangeBehavior schemaChangeBehavior, - String timezone) { - this.routingRules = routingRules; - this.chainingStrategy = ChainingStrategy.ALWAYS; - this.rpcTimeOutInMillis = rpcTimeOut.toMillis(); - this.schemaChangeBehavior = schemaChangeBehavior; - this.timezone = timezone; - } - - @Override - public void open() throws Exception { - super.open(); - schemaOperatorMetrics = - new SchemaOperatorMetrics( - getRuntimeContext().getMetricGroup(), schemaChangeBehavior); - subTaskId = getRuntimeContext().getIndexOfThisSubtask(); - } - - @Override - public void setup( - StreamTask containingTask, - StreamConfig config, - Output> output) { - super.setup(containingTask, config, output); - this.toCoordinator = containingTask.getEnvironment().getOperatorCoordinatorEventGateway(); - routes = - routingRules.stream() - .map( - rule -> { - String tableInclusions = rule.sourceTable; - Selectors selectors = - new Selectors.SelectorsBuilder() - .includeTables(tableInclusions) - .build(); - return new Tuple3<>( - selectors, rule.sinkTable, rule.replaceSymbol); - }) - .collect(Collectors.toList()); - schemaEvolutionClient = new SchemaEvolutionClient(toCoordinator, getOperatorID()); - evolvedSchema = - CacheBuilder.newBuilder() - .expireAfterAccess(CACHE_EXPIRE_DURATION) - .build( - new CacheLoader() { - @Override - public Schema load(TableId tableId) { - return getLatestEvolvedSchema(tableId); - } - }); - originalSchema = - CacheBuilder.newBuilder() - .expireAfterAccess(CACHE_EXPIRE_DURATION) - .build( - new CacheLoader() { - @Override - public Schema load(TableId tableId) throws Exception { - return getLatestOriginalSchema(tableId); - } - }); - schemaDivergesMap = - CacheBuilder.newBuilder() - .expireAfterAccess(CACHE_EXPIRE_DURATION) - .build( - new CacheLoader() { - @Override - public Boolean load(TableId tableId) throws Exception { - return checkSchemaDiverges(tableId); - } - }); - tableIdMappingCache = - CacheBuilder.newBuilder() - .expireAfterAccess(CACHE_EXPIRE_DURATION) - .build( - new CacheLoader>() { - @Override - public List load(TableId tableId) { - return getRoutedTables(tableId); - } - }); - } - - /** - * This method is guaranteed to not be called concurrently with other methods of the operator. - */ - @Override - public void processElement(StreamRecord streamRecord) - throws InterruptedException, TimeoutException, ExecutionException { - Event event = streamRecord.getValue(); - if (event instanceof SchemaChangeEvent) { - processSchemaChangeEvents((SchemaChangeEvent) event); - } else if (event instanceof DataChangeEvent) { - processDataChangeEvents(streamRecord, (DataChangeEvent) event); - } else { - throw new RuntimeException("Unknown event type in Stream record: " + event); - } - } - - private void processSchemaChangeEvents(SchemaChangeEvent event) - throws InterruptedException, TimeoutException, ExecutionException { - TableId tableId = event.tableId(); - LOG.info( - "{}> Table {} received SchemaChangeEvent {} and start to be blocked.", - subTaskId, - tableId, - event); - handleSchemaChangeEvent(tableId, event); - - if (event instanceof DropTableEvent) { - // Update caches unless event is a Drop table event. In that case, no schema will be - // available / necessary. - return; - } - - originalSchema.put(tableId, getLatestOriginalSchema(tableId)); - schemaDivergesMap.put(tableId, checkSchemaDiverges(tableId)); - - List optionalRoutedTable = getRoutedTables(tableId); - if (!optionalRoutedTable.isEmpty()) { - tableIdMappingCache - .get(tableId) - .forEach(routed -> evolvedSchema.put(routed, getLatestEvolvedSchema(routed))); - } else { - evolvedSchema.put(tableId, getLatestEvolvedSchema(tableId)); - } - } - - private void processDataChangeEvents(StreamRecord streamRecord, DataChangeEvent event) { - TableId tableId = event.tableId(); - List optionalRoutedTable = getRoutedTables(tableId); - if (!optionalRoutedTable.isEmpty()) { - optionalRoutedTable.forEach( - evolvedTableId -> { - output.collect( - new StreamRecord<>( - normalizeSchemaChangeEvents(event, evolvedTableId, false))); - }); - } else if (Boolean.FALSE.equals(schemaDivergesMap.getIfPresent(tableId))) { - output.collect(new StreamRecord<>(normalizeSchemaChangeEvents(event, true))); - } else { - output.collect(streamRecord); - } - } - - private DataChangeEvent normalizeSchemaChangeEvents( - DataChangeEvent event, boolean tolerantMode) { - return normalizeSchemaChangeEvents(event, event.tableId(), tolerantMode); - } - - private DataChangeEvent normalizeSchemaChangeEvents( - DataChangeEvent event, TableId renamedTableId, boolean tolerantMode) { - try { - Schema originalSchema = this.originalSchema.get(event.tableId()); - Schema evolvedTableSchema = evolvedSchema.get(renamedTableId); - if (originalSchema.equals(evolvedTableSchema)) { - return ChangeEventUtils.recreateDataChangeEvent(event, renamedTableId); - } - switch (event.op()) { - case INSERT: - return DataChangeEvent.insertEvent( - renamedTableId, - regenerateRecordData( - event.after(), - originalSchema, - evolvedTableSchema, - tolerantMode), - event.meta()); - case UPDATE: - return DataChangeEvent.updateEvent( - renamedTableId, - regenerateRecordData( - event.before(), - originalSchema, - evolvedTableSchema, - tolerantMode), - regenerateRecordData( - event.after(), - originalSchema, - evolvedTableSchema, - tolerantMode), - event.meta()); - case DELETE: - return DataChangeEvent.deleteEvent( - renamedTableId, - regenerateRecordData( - event.before(), - originalSchema, - evolvedTableSchema, - tolerantMode), - event.meta()); - case REPLACE: - return DataChangeEvent.replaceEvent( - renamedTableId, - regenerateRecordData( - event.after(), - originalSchema, - evolvedTableSchema, - tolerantMode), - event.meta()); - default: - throw new IllegalArgumentException( - String.format("Unrecognized operation type \"%s\"", event.op())); - } - } catch (Exception e) { - throw new IllegalStateException("Unable to fill null for empty columns", e); - } - } - - private RecordData regenerateRecordData( - RecordData recordData, - Schema originalSchema, - Schema routedTableSchema, - boolean tolerantMode) { - // Regenerate record data - List fieldGetters = new ArrayList<>(); - for (Column column : routedTableSchema.getColumns()) { - String columnName = column.getName(); - int columnIndex = originalSchema.getColumnNames().indexOf(columnName); - if (columnIndex == -1) { - fieldGetters.add(new NullFieldGetter()); - } else { - RecordData.FieldGetter fieldGetter = - RecordData.createFieldGetter( - originalSchema.getColumn(columnName).get().getType(), columnIndex); - // Check type compatibility, ignoring nullability - if (originalSchema - .getColumn(columnName) - .get() - .getType() - .nullable() - .equals(column.getType().nullable())) { - fieldGetters.add(fieldGetter); - } else { - fieldGetters.add( - new TypeCoercionFieldGetter( - originalSchema.getColumn(columnName).get().getType(), - column.getType(), - fieldGetter, - tolerantMode, - timezone)); - } - } - } - BinaryRecordDataGenerator recordDataGenerator = - new BinaryRecordDataGenerator( - routedTableSchema.getColumnDataTypes().toArray(new DataType[0])); - return recordDataGenerator.generate( - fieldGetters.stream() - .map(fieldGetter -> fieldGetter.getFieldOrNull(recordData)) - .toArray()); - } - - private List getRoutedTables(TableId originalTableId) { - return routes.stream() - .filter(route -> route.f0.isMatch(originalTableId)) - .map(route -> resolveReplacement(originalTableId, route)) - .collect(Collectors.toList()); - } - - private TableId resolveReplacement( - TableId originalTable, Tuple3 route) { - if (route.f2 != null) { - return TableId.parse(route.f1.replace(route.f2, originalTable.getTableName())); - } - return TableId.parse(route.f1); - } - - @VisibleForTesting - protected int getCurrentTimestamp() { - return (int) Instant.now().getEpochSecond(); - } - - private void handleSchemaChangeEvent(TableId tableId, SchemaChangeEvent schemaChangeEvent) - throws InterruptedException, TimeoutException { - - if (schemaChangeBehavior == SchemaChangeBehavior.EXCEPTION - && schemaChangeEvent.getType() != SchemaChangeEventType.CREATE_TABLE) { - // CreateTableEvent should be applied even in EXCEPTION mode - throw new RuntimeException( - String.format( - "Refused to apply schema change event %s in EXCEPTION mode.", - schemaChangeEvent)); - } - - long nonce = - NonceUtils.generateNonce( - getCurrentTimestamp(), subTaskId, tableId, schemaChangeEvent); - - LOG.info("{}> Sending the FlushEvent for table {} (nonce: {}).", subTaskId, tableId, nonce); - output.collect(new StreamRecord<>(new FlushEvent(tableId, nonce))); - - // The request will block if another schema change event is being handled - SchemaChangeResponse response = requestSchemaChange(tableId, schemaChangeEvent, nonce); - if (response.isAccepted()) { - List expectedSchemaChangeEvents = response.getSchemaChangeEvents(); - schemaOperatorMetrics.increaseSchemaChangeEvents(expectedSchemaChangeEvents.size()); - - // The request will block until flushing finished in each sink writer - SchemaChangeResultResponse schemaEvolveResponse = requestSchemaChangeResult(nonce); - List finishedSchemaChangeEvents = - schemaEvolveResponse.getFinishedSchemaChangeEvents(); - - // Update evolved schema changes based on apply results - finishedSchemaChangeEvents.forEach(e -> output.collect(new StreamRecord<>(e))); - } else if (response.isDuplicate()) { - LOG.info( - "{}> Schema change event {} has been handled in another subTask already.", - subTaskId, - schemaChangeEvent); - } else if (response.isIgnored()) { - LOG.info( - "{}> Schema change event {} has been ignored. No schema evolution needed.", - subTaskId, - schemaChangeEvent); - } else { - throw new IllegalStateException("Unexpected response status " + response); - } - } - - private SchemaChangeResponse requestSchemaChange( - TableId tableId, SchemaChangeEvent schemaChangeEvent, long nonce) - throws InterruptedException, TimeoutException { - long schemaEvolveTimeOutMillis = System.currentTimeMillis() + rpcTimeOutInMillis; - while (true) { - SchemaChangeResponse response = - sendRequestToCoordinator( - new SchemaChangeRequest(tableId, schemaChangeEvent, subTaskId, nonce)); - if (System.currentTimeMillis() < schemaEvolveTimeOutMillis) { - if (response.isRegistryBusy()) { - LOG.info( - "{}> Schema Registry is busy now, waiting for next request...", - subTaskId); - Thread.sleep(1000); - } else if (response.isWaitingForFlush()) { - LOG.info( - "{}> Schema change event (with nonce {}) has not collected enough flush success events from writers, waiting...", - subTaskId, - nonce); - Thread.sleep(1000); - } else { - return response; - } - } else { - throw new TimeoutException("TimeOut when requesting schema change"); - } - } - } - - private SchemaChangeResultResponse requestSchemaChangeResult(long nonce) - throws InterruptedException, TimeoutException { - CoordinationResponse coordinationResponse = - sendRequestToCoordinator(new SchemaChangeResultRequest(nonce)); - long nextRpcTimeOutMillis = System.currentTimeMillis() + rpcTimeOutInMillis; - while (coordinationResponse instanceof SchemaChangeProcessingResponse) { - if (System.currentTimeMillis() < nextRpcTimeOutMillis) { - Thread.sleep(1000); - coordinationResponse = - sendRequestToCoordinator(new SchemaChangeResultRequest(nonce)); - } else { - throw new TimeoutException("TimeOut when requesting release upstream"); - } - } - return ((SchemaChangeResultResponse) coordinationResponse); - } - - private - RESPONSE sendRequestToCoordinator(REQUEST request) { - try { - CompletableFuture responseFuture = - toCoordinator.sendRequestToCoordinator( - getOperatorID(), new SerializedValue<>(request)); - return CoordinationResponseUtils.unwrap(responseFuture.get()); - } catch (Exception e) { - throw new IllegalStateException( - "Failed to send request to coordinator: " + request.toString(), e); - } - } - - private Schema getLatestEvolvedSchema(TableId tableId) { - try { - Optional optionalSchema = schemaEvolutionClient.getLatestEvolvedSchema(tableId); - if (!optionalSchema.isPresent()) { - throw new IllegalStateException( - String.format("Schema doesn't exist for table \"%s\"", tableId)); - } - return optionalSchema.get(); - } catch (Exception e) { - throw new IllegalStateException( - String.format("Unable to get latest schema for table \"%s\"", tableId), e); - } - } - - private Schema getLatestOriginalSchema(TableId tableId) { - try { - Optional optionalSchema = - schemaEvolutionClient.getLatestOriginalSchema(tableId); - if (!optionalSchema.isPresent()) { - throw new IllegalStateException( - String.format("Schema doesn't exist for table \"%s\"", tableId)); - } - return optionalSchema.get(); - } catch (Exception e) { - throw new IllegalStateException( - String.format("Unable to get latest schema for table \"%s\"", tableId), e); - } - } - - private Boolean checkSchemaDiverges(TableId tableId) { - try { - return getLatestEvolvedSchema(tableId).equals(getLatestOriginalSchema(tableId)); - } catch (IllegalStateException e) { - // schema fetch failed, regard it as diverged - return true; - } - } - - private static class NullFieldGetter implements RecordData.FieldGetter { - @Nullable - @Override - public Object getFieldOrNull(RecordData recordData) { - return null; - } - } - - private static class TypeCoercionFieldGetter implements RecordData.FieldGetter { - private final DataType originalType; - private final DataType destinationType; - private final RecordData.FieldGetter originalFieldGetter; - private final boolean tolerantMode; - private final String timezone; - - public TypeCoercionFieldGetter( - DataType originalType, - DataType destinationType, - RecordData.FieldGetter originalFieldGetter, - boolean tolerantMode, - String timezone) { - this.originalType = originalType; - this.destinationType = destinationType; - this.originalFieldGetter = originalFieldGetter; - this.tolerantMode = tolerantMode; - this.timezone = timezone; - } - - private Object fail(IllegalArgumentException e) throws IllegalArgumentException { - if (tolerantMode) { - return null; - } - throw e; - } - - @Nullable - @Override - public Object getFieldOrNull(RecordData recordData) { - Object originalField = originalFieldGetter.getFieldOrNull(recordData); - if (originalField == null) { - return null; - } - if (destinationType.is(DataTypeRoot.BIGINT)) { - if (originalField instanceof Byte) { - // TINYINT - return ((Byte) originalField).longValue(); - } else if (originalField instanceof Short) { - // SMALLINT - return ((Short) originalField).longValue(); - } else if (originalField instanceof Integer) { - // INT - return ((Integer) originalField).longValue(); - } else if (originalField instanceof Long) { - // BIGINT - return originalField; - } else { - return fail( - new IllegalArgumentException( - String.format( - "Cannot fit type \"%s\" into a BIGINT column. " - + "Currently only TINYINT / SMALLINT / INT / LONG can be accepted by a BIGINT column", - originalField.getClass()))); - } - } else if (destinationType instanceof DecimalType) { - DecimalType decimalType = (DecimalType) destinationType; - BigDecimal decimalValue; - if (originalField instanceof Byte) { - decimalValue = BigDecimal.valueOf(((Byte) originalField).longValue(), 0); - } else if (originalField instanceof Short) { - decimalValue = BigDecimal.valueOf(((Short) originalField).longValue(), 0); - } else if (originalField instanceof Integer) { - decimalValue = BigDecimal.valueOf(((Integer) originalField).longValue(), 0); - } else if (originalField instanceof Long) { - decimalValue = BigDecimal.valueOf((Long) originalField, 0); - } else if (originalField instanceof DecimalData) { - decimalValue = ((DecimalData) originalField).toBigDecimal(); - } else { - return fail( - new IllegalArgumentException( - String.format( - "Cannot fit type \"%s\" into a DECIMAL column. " - + "Currently only BYTE / SHORT / INT / LONG / DECIMAL can be accepted by a DECIMAL column", - originalField.getClass()))); - } - return decimalValue != null - ? DecimalData.fromBigDecimal( - decimalValue, decimalType.getPrecision(), decimalType.getScale()) - : null; - } else if (destinationType.is(DataTypeFamily.APPROXIMATE_NUMERIC)) { - if (originalField instanceof Float) { - // FLOAT - return ((Float) originalField).doubleValue(); - } else { - return fail( - new IllegalArgumentException( - String.format( - "Cannot fit type \"%s\" into a DOUBLE column. " - + "Currently only FLOAT can be accepted by a DOUBLE column", - originalField.getClass()))); - } - } else if (destinationType.is(DataTypeRoot.VARCHAR)) { - if (originalField instanceof StringData) { - return originalField; - } else { - return fail( - new IllegalArgumentException( - String.format( - "Cannot fit type \"%s\" into a STRING column. " - + "Currently only CHAR / VARCHAR can be accepted by a STRING column", - originalField.getClass()))); - } - } else if (destinationType.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) - && originalType.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE)) { - // For now, TimestampData / ZonedTimestampData / LocalZonedTimestampData has no - // difference in its internal representation, so there's no need to do any precision - // conversion. - return originalField; - } else if (destinationType.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE) - && originalType.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) { - return originalField; - } else if (destinationType.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) - && originalType.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)) { - return originalField; - } else if (destinationType.is(DataTypeFamily.TIMESTAMP) - && originalType.is(DataTypeFamily.TIMESTAMP)) { - return castToTimestamp(originalField, timezone); - } else { - return fail( - new IllegalArgumentException( - String.format( - "Column type \"%s\" doesn't support type coercion", - destinationType))); - } - } - } - - @Override - public void snapshotState(StateSnapshotContext context) throws Exception { - // Needless to do anything, since AbstractStreamOperator#snapshotState and #processElement - // is guaranteed not to be mixed together. - } - - private static TimestampData castToTimestamp(Object object, String timezone) { - if (object == null) { - return null; - } - if (object instanceof LocalZonedTimestampData) { - return TimestampData.fromLocalDateTime( - LocalDateTime.ofInstant( - ((LocalZonedTimestampData) object).toInstant(), ZoneId.of(timezone))); - } else if (object instanceof ZonedTimestampData) { - return TimestampData.fromLocalDateTime( - LocalDateTime.ofInstant( - ((ZonedTimestampData) object).toInstant(), ZoneId.of(timezone))); - } else { - throw new IllegalArgumentException( - String.format( - "Unable to implicitly coerce object `%s` as a TIMESTAMP.", object)); - } - } -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/CoordinatorExecutorThreadFactory.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/CoordinatorExecutorThreadFactory.java new file mode 100644 index 00000000000..57949566719 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/CoordinatorExecutorThreadFactory.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.util.FatalExitExceptionHandler; + +import java.util.concurrent.ThreadFactory; + +/** A thread factory class that provides some helper methods. */ +public class CoordinatorExecutorThreadFactory implements ThreadFactory { + + private final String coordinatorThreadName; + private final ClassLoader cl; + private final Thread.UncaughtExceptionHandler errorHandler; + + private Thread t; + + public CoordinatorExecutorThreadFactory( + final String coordinatorThreadName, final ClassLoader contextClassLoader) { + this(coordinatorThreadName, contextClassLoader, FatalExitExceptionHandler.INSTANCE); + } + + public CoordinatorExecutorThreadFactory( + final String coordinatorThreadName, + final ClassLoader contextClassLoader, + final Thread.UncaughtExceptionHandler errorHandler) { + this.coordinatorThreadName = coordinatorThreadName; + this.cl = contextClassLoader; + this.errorHandler = errorHandler; + } + + @Override + public synchronized Thread newThread(Runnable r) { + if (t != null) { + throw new Error( + "This indicates that a fatal error has happened and caused the " + + "coordinator executor thread to exit. Check the earlier logs" + + "to see the root cause of the problem."); + } + t = new Thread(r, coordinatorThreadName); + t.setContextClassLoader(cl); + t.setUncaughtExceptionHandler(errorHandler); + return t; + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaDerivator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaDerivator.java new file mode 100755 index 00000000000..308d97be252 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaDerivator.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.cdc.common.data.RecordData; +import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.DataChangeEvent; +import org.apache.flink.cdc.common.event.DropColumnEvent; +import org.apache.flink.cdc.common.event.RenameColumnEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEventWithPreSchema; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.common.types.DataType; +import org.apache.flink.cdc.common.utils.SchemaMergingUtils; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; + +import org.apache.flink.shaded.guava31.com.google.common.collect.Table; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** A utility class to derive how evolved schemas should change. */ +public class SchemaDerivator { + + private static final Logger LOG = LoggerFactory.getLogger(SchemaDerivator.class); + + /** Get affected evolved table IDs based on changed upstream tables. */ + public static Set getAffectedEvolvedTables( + final TableIdRouter tableIdRouter, final Set changedUpstreamTables) { + return changedUpstreamTables.stream() + .flatMap(cut -> tableIdRouter.route(cut).stream()) + .collect(Collectors.toSet()); + } + + /** For an evolved table ID, reverse lookup all upstream tables that it depends on. */ + public static Set reverseLookupDependingUpstreamTables( + final TableIdRouter tableIdRouter, + final TableId evolvedTableId, + final Set upstreamSchemaTables) { + return upstreamSchemaTables.stream() + .filter(kut -> tableIdRouter.route(kut).contains(evolvedTableId)) + .collect(Collectors.toSet()); + } + + /** For an evolved table ID, reverse lookup all upstream tables that it depends on. */ + public static Set reverseLookupDependingUpstreamTables( + final TableIdRouter tableIdRouter, + final TableId evolvedTableId, + final Table upstreamSchemaTable) { + return upstreamSchemaTable.rowKeySet().stream() + .filter(kut -> tableIdRouter.route(kut).contains(evolvedTableId)) + .collect(Collectors.toSet()); + } + + /** For an evolved table ID, reverse lookup all upstream schemas that needs to be fit in. */ + public static Set reverseLookupDependingUpstreamSchemas( + final TableIdRouter tableIdRouter, + final TableId evolvedTableId, + final SchemaManager schemaManager) { + return reverseLookupDependingUpstreamTables( + tableIdRouter, evolvedTableId, schemaManager.getAllOriginalTables()) + .stream() + .map(utid -> schemaManager.getLatestOriginalSchema(utid).get()) + .collect(Collectors.toSet()); + } + + /** For an evolved table ID, reverse lookup all upstream schemas that needs to be fit in. */ + public static Set reverseLookupDependingUpstreamSchemas( + final TableIdRouter tableIdRouter, + final TableId evolvedTableId, + final Table upstreamSchemaTable) { + return reverseLookupDependingUpstreamTables( + tableIdRouter, evolvedTableId, upstreamSchemaTable) + .stream() + .flatMap(utid -> upstreamSchemaTable.row(utid).values().stream()) + .collect(Collectors.toSet()); + } + + /** + * Rewrite {@link SchemaChangeEvent}s by current {@link SchemaChangeBehavior} and include / + * exclude them by fine-grained schema change event configurations. + */ + public static List normalizeSchemaChangeEvents( + Schema oldSchema, + List schemaChangeEvents, + SchemaChangeBehavior schemaChangeBehavior, + MetadataApplier metadataApplier) { + List rewrittenSchemaChangeEvents = + rewriteSchemaChangeEvents(oldSchema, schemaChangeEvents, schemaChangeBehavior); + rewrittenSchemaChangeEvents.forEach( + evt -> { + if (evt instanceof SchemaChangeEventWithPreSchema) { + SchemaChangeEventWithPreSchema eventNeedsPreSchema = + (SchemaChangeEventWithPreSchema) evt; + if (!eventNeedsPreSchema.hasPreSchema()) { + eventNeedsPreSchema.fillPreSchema(oldSchema); + } + } + }); + + List finalSchemaChangeEvents = new ArrayList<>(); + for (SchemaChangeEvent schemaChangeEvent : rewrittenSchemaChangeEvents) { + if (metadataApplier.acceptsSchemaEvolutionType(schemaChangeEvent.getType())) { + finalSchemaChangeEvents.add(schemaChangeEvent); + } else { + LOG.info("Ignored schema change {}.", schemaChangeEvent); + } + } + return finalSchemaChangeEvents; + } + + private static List rewriteSchemaChangeEvents( + Schema oldSchema, + List schemaChangeEvents, + SchemaChangeBehavior schemaChangeBehavior) { + switch (schemaChangeBehavior) { + case EVOLVE: + case TRY_EVOLVE: + case EXCEPTION: + return schemaChangeEvents; + case LENIENT: + return schemaChangeEvents.stream() + .flatMap(evt -> lenientizeSchemaChangeEvent(oldSchema, evt)) + .collect(Collectors.toList()); + case IGNORE: + return schemaChangeEvents.stream() + .filter(e -> e instanceof CreateTableEvent) + .collect(Collectors.toList()); + default: + throw new IllegalArgumentException( + "Unexpected schema change behavior: " + schemaChangeBehavior); + } + } + + private static Stream lenientizeSchemaChangeEvent( + Schema oldSchema, SchemaChangeEvent schemaChangeEvent) { + TableId tableId = schemaChangeEvent.tableId(); + switch (schemaChangeEvent.getType()) { + case ADD_COLUMN: + return lenientizeAddColumnEvent((AddColumnEvent) schemaChangeEvent, tableId); + case DROP_COLUMN: + return lenientizeDropColumnEvent( + oldSchema, (DropColumnEvent) schemaChangeEvent, tableId); + case RENAME_COLUMN: + return lenientizeRenameColumnEvent( + oldSchema, (RenameColumnEvent) schemaChangeEvent, tableId); + default: + return Stream.of(schemaChangeEvent); + } + } + + private static Stream lenientizeRenameColumnEvent( + Schema oldSchema, RenameColumnEvent schemaChangeEvent, TableId tableId) { + List appendColumns = new ArrayList<>(); + Map convertNullableColumns = new HashMap<>(); + schemaChangeEvent + .getNameMapping() + .forEach( + (oldColName, newColName) -> { + Column column = + oldSchema + .getColumn(oldColName) + .orElseThrow( + () -> + new IllegalArgumentException( + "Non-existed column " + + oldColName + + " in evolved schema.")); + if (!column.getType().isNullable()) { + // It's a not-nullable column, we need to cast it to + // nullable first + convertNullableColumns.put(oldColName, column.getType().nullable()); + } + appendColumns.add( + new AddColumnEvent.ColumnWithPosition( + column.copy(newColName) + .copy(column.getType().nullable()))); + }); + + List events = new ArrayList<>(); + events.add(new AddColumnEvent(tableId, appendColumns)); + if (!convertNullableColumns.isEmpty()) { + events.add(new AlterColumnTypeEvent(tableId, convertNullableColumns)); + } + return events.stream(); + } + + private static Stream lenientizeDropColumnEvent( + Schema oldSchema, DropColumnEvent schemaChangeEvent, TableId tableId) { + Map convertNullableColumns = + schemaChangeEvent.getDroppedColumnNames().stream() + .map(oldSchema::getColumn) + .flatMap(e -> e.map(Stream::of).orElse(Stream.empty())) + .filter(col -> !col.getType().isNullable()) + .collect( + Collectors.toMap( + Column::getName, column -> column.getType().nullable())); + + if (convertNullableColumns.isEmpty()) { + return Stream.empty(); + } else { + return Stream.of(new AlterColumnTypeEvent(tableId, convertNullableColumns)); + } + } + + private static Stream lenientizeAddColumnEvent( + AddColumnEvent schemaChangeEvent, TableId tableId) { + return Stream.of( + new AddColumnEvent( + tableId, + schemaChangeEvent.getAddedColumns().stream() + .map( + col -> + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + col.getAddColumn().getName(), + col.getAddColumn() + .getType() + .nullable(), + col.getAddColumn().getComment(), + col.getAddColumn() + .getDefaultValueExpression()))) + .collect(Collectors.toList()))); + } + + /** Coerce a {@link DataChangeEvent} from upstream to expected downstream schema. */ + public static Optional coerceDataRecord( + String timezone, + DataChangeEvent dataChangeEvent, + Schema upstreamSchema, + @Nullable Schema evolvedSchema) { + if (evolvedSchema == null) { + // Sink does not recognize this tableId, might have been dropped, just ignore it. + return Optional.empty(); + } + + if (upstreamSchema.equals(evolvedSchema)) { + // If there's no schema difference, just return the original event. + return Optional.of(dataChangeEvent); + } + + // TODO: We may cache these accessors later for better performance. + List upstreamSchemaReader = + SchemaUtils.createFieldGetters(upstreamSchema); + BinaryRecordDataGenerator evolvedSchemaWriter = + new BinaryRecordDataGenerator( + evolvedSchema.getColumnDataTypes().toArray(new DataType[0])); + + // Coerce binary data records + if (dataChangeEvent.before() != null) { + List upstreamFields = + SchemaUtils.restoreOriginalData(dataChangeEvent.before(), upstreamSchemaReader); + Object[] coercedRow = + SchemaMergingUtils.coerceRow( + timezone, evolvedSchema, upstreamSchema, upstreamFields); + + dataChangeEvent = + DataChangeEvent.projectBefore( + dataChangeEvent, evolvedSchemaWriter.generate(coercedRow)); + } + + if (dataChangeEvent.after() != null) { + List upstreamFields = + SchemaUtils.restoreOriginalData(dataChangeEvent.after(), upstreamSchemaReader); + Object[] coercedRow = + SchemaMergingUtils.coerceRow( + timezone, evolvedSchema, upstreamSchema, upstreamFields); + + dataChangeEvent = + DataChangeEvent.projectAfter( + dataChangeEvent, evolvedSchemaWriter.generate(coercedRow)); + } + + return Optional.of(dataChangeEvent); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaManager.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManager.java similarity index 70% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaManager.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManager.java index 60e70b7ed89..6d36f6eb7ba 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaManager.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManager.java @@ -15,18 +15,15 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.coordinator; +package org.apache.flink.cdc.runtime.operators.schema.common; import org.apache.flink.cdc.common.annotation.Internal; -import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.annotation.VisibleForTesting; import org.apache.flink.cdc.common.event.CreateTableEvent; import org.apache.flink.cdc.common.event.SchemaChangeEvent; import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.event.visitor.SchemaChangeEventVisitor; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; -import org.apache.flink.cdc.common.schema.Column; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.types.DataType; import org.apache.flink.cdc.common.utils.SchemaUtils; import org.apache.flink.cdc.runtime.serializer.TableIdSerializer; import org.apache.flink.cdc.runtime.serializer.schema.SchemaSerializer; @@ -34,20 +31,17 @@ import org.apache.flink.core.memory.DataInputViewStreamWrapper; import org.apache.flink.core.memory.DataOutputViewStreamWrapper; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -59,7 +53,6 @@ */ @Internal public class SchemaManager { - private static final Logger LOG = LoggerFactory.getLogger(SchemaManager.class); private static final int INITIAL_SCHEMA_VERSION = 0; private static final int VERSIONS_TO_KEEP = 3; private final SchemaChangeBehavior behavior; @@ -79,12 +72,6 @@ public SchemaManager() { behavior = SchemaChangeBehavior.EVOLVE; } - public SchemaManager(SchemaChangeBehavior behavior) { - evolvedSchemas = new HashMap<>(); - originalSchemas = new HashMap<>(); - this.behavior = behavior; - } - public SchemaManager( Map> originalSchemas, Map> evolvedSchemas, @@ -98,97 +85,6 @@ public SchemaChangeBehavior getBehavior() { return behavior; } - /** - * This function checks if the given schema change event has been applied already. If so, it - * will be ignored to avoid sending duplicate evolved schema change events to sink metadata - * applier. - */ - public final boolean isOriginalSchemaChangeEventRedundant(SchemaChangeEvent event) { - TableId tableId = event.tableId(); - Optional latestSchema = getLatestOriginalSchema(tableId); - return Boolean.TRUE.equals( - SchemaChangeEventVisitor.visit( - event, - addColumnEvent -> { - // It has not been applied if schema does not even exist - if (!latestSchema.isPresent()) { - return false; - } - List existedColumns = latestSchema.get().getColumns(); - - // It has been applied only if all columns are present in existedColumns - for (AddColumnEvent.ColumnWithPosition column : - addColumnEvent.getAddedColumns()) { - if (!existedColumns.contains(column.getAddColumn())) { - return false; - } - } - return true; - }, - alterColumnTypeEvent -> { - // It has not been applied if schema does not even exist - if (!latestSchema.isPresent()) { - return false; - } - Schema schema = latestSchema.get(); - - // It has been applied only if all column types are set as expected - for (Map.Entry entry : - alterColumnTypeEvent.getTypeMapping().entrySet()) { - if (!schema.getColumn(entry.getKey()).isPresent() - || !schema.getColumn(entry.getKey()) - .get() - .getType() - .equals(entry.getValue())) { - return false; - } - } - return true; - }, - createTableEvent -> { - // It has been applied if such table already exists - return latestSchema.isPresent(); - }, - dropColumnEvent -> { - // It has not been applied if schema does not even exist - if (!latestSchema.isPresent()) { - return false; - } - List existedColumnNames = latestSchema.get().getColumnNames(); - - // It has been applied only if corresponding column types do not exist - return dropColumnEvent.getDroppedColumnNames().stream() - .noneMatch(existedColumnNames::contains); - }, - dropTableEvent -> { - // It has been applied if such table does not exist - return !latestSchema.isPresent(); - }, - renameColumnEvent -> { - // It has been applied if such table already exists - if (!latestSchema.isPresent()) { - return false; - } - List existedColumnNames = latestSchema.get().getColumnNames(); - - // It has been applied only if all previous names do not exist, and all - // new names already exist - for (Map.Entry entry : - renameColumnEvent.getNameMapping().entrySet()) { - if (existedColumnNames.contains(entry.getKey()) - || !existedColumnNames.contains(entry.getValue())) { - return false; - } - } - return true; - }, - truncateTableEvent -> { - // We have no way to ensure if a TruncateTableEvent has been applied - // before. Just assume it's not. - return false; - })); - } - public final boolean schemaExists( Map> schemaMap, TableId tableId) { return schemaMap.containsKey(tableId) && !schemaMap.get(tableId).isEmpty(); @@ -202,6 +98,10 @@ public final boolean evolvedSchemaExists(TableId tableId) { return schemaExists(evolvedSchemas, tableId); } + public final Set getAllOriginalTables() { + return originalSchemas.keySet(); + } + /** Get the latest evolved schema of the specified table. */ public Optional getLatestEvolvedSchema(TableId tableId) { return getLatestSchemaVersion(evolvedSchemas, tableId) @@ -254,8 +154,6 @@ public void applyOriginalSchemaChange(SchemaChangeEvent schemaChangeEvent) { optionalSchema.isPresent(), "Unable to apply SchemaChangeEvent for table \"%s\" without existing schema", schemaChangeEvent.tableId()); - - LOG.info("Handling original schema change event: {}", schemaChangeEvent); registerNewSchema( originalSchemas, schemaChangeEvent.tableId(), @@ -273,8 +171,6 @@ public void applyEvolvedSchemaChange(SchemaChangeEvent schemaChangeEvent) { optionalSchema.isPresent(), "Unable to apply SchemaChangeEvent for table \"%s\" without existing schema", schemaChangeEvent.tableId()); - - LOG.info("Handling evolved schema change event: {}", schemaChangeEvent); registerNewSchema( evolvedSchemas, schemaChangeEvent.tableId(), @@ -316,11 +212,9 @@ private Optional getLatestSchemaVersion( private void handleCreateTableEvent( final Map> schemaMap, CreateTableEvent event) { - checkArgument( - !schemaExists(schemaMap, event.tableId()), - "Unable to apply CreateTableEvent to an existing schema for table \"%s\"", - event.tableId()); - LOG.info("Handling schema change event: {}", event); + // Upstream schemas are transient, and might be sent differently after restarting from + // state. Thus, we should allow upstream CreateTableEvents to emplace existing ones, instead + // of throwing an exception. registerNewSchema(schemaMap, event.tableId(), event.getSchema()); } @@ -342,6 +236,16 @@ private void registerNewSchema( } } + @VisibleForTesting + public void registerNewOriginalSchema(TableId tableId, Schema newSchema) { + registerNewSchema(originalSchemas, tableId, newSchema); + } + + @VisibleForTesting + public void registerNewEvolvedSchema(TableId tableId, Schema newSchema) { + registerNewSchema(evolvedSchemas, tableId, newSchema); + } + /** Serializer for {@link SchemaManager}. */ public static class Serializer implements SimpleVersionedSerializer { @@ -449,4 +353,23 @@ private static Map> deserializeSchemaMap( return tableSchemas; } } + + @Override + public String toString() { + return String.format( + "Schema Manager %s: \n" + + "\toriginal schema map:\n%s\n" + + "\tevolved schema map:\n%s", + hashCode(), schemaMapToString(originalSchemas), schemaMapToString(evolvedSchemas)); + } + + private static String schemaMapToString(Map> schemaMap) { + StringBuilder sb = new StringBuilder(); + for (Map.Entry> entry : schemaMap.entrySet()) { + TableId tableId = entry.getKey(); + SortedMap versionedSchemas = entry.getValue(); + sb.append(String.format("\t\t- table %s: %s\n", tableId, schemaMap)); + } + return sb.toString(); + } } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaRegistry.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaRegistry.java new file mode 100644 index 00000000000..6d0377a947e --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaRegistry.java @@ -0,0 +1,400 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.cdc.common.annotation.Internal; +import org.apache.flink.cdc.common.annotation.VisibleForTesting; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.FlushSuccessEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.SinkWriterRegisterEvent; +import org.apache.flink.cdc.runtime.operators.sink.SchemaEvolutionClient; +import org.apache.flink.runtime.operators.coordination.CoordinationRequest; +import org.apache.flink.runtime.operators.coordination.CoordinationRequestHandler; +import org.apache.flink.runtime.operators.coordination.CoordinationResponse; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; +import org.apache.flink.runtime.operators.coordination.OperatorEvent; +import org.apache.flink.util.ExceptionUtils; +import org.apache.flink.util.FlinkRuntimeException; +import org.apache.flink.util.function.ThrowingRunnable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeoutException; +import java.util.function.BooleanSupplier; + +import static org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils.wrap; + +/** + * An abstract centralized schema registry that accepts requests from {@link SchemaEvolutionClient}. + * A legit schema registry should be able to: + * + *
    + *
  • Handle schema retrieval requests from {@link SchemaEvolutionClient}s + *
  • Accept and trace sink writers' registering events + *
  • Snapshot & Restore its state during checkpoints + *
+ * + *
+ * These abilities are done by overriding abstract methods of {@link SchemaRegistry}. All methods + * will run in given {@link ExecutorService} asynchronously except {@code SchemaRegistry#restore}. + */ +@Internal +public abstract class SchemaRegistry implements OperatorCoordinator, CoordinationRequestHandler { + + private static final Logger LOG = LoggerFactory.getLogger(SchemaRegistry.class); + + // ------------------------- + // Static fields that got bind after construction + // ------------------------- + protected final OperatorCoordinator.Context context; + protected final String operatorName; + protected final ExecutorService coordinatorExecutor; + protected final MetadataApplier metadataApplier; + protected final Duration rpcTimeout; + protected final List routingRules; + + // ------------------------- + // Dynamically initialized transient fields (after coordinator starts) + // ------------------------- + protected transient int currentParallelism; + protected transient Set activeSinkWriters; + protected transient Set flushedSinkWriters; + protected transient Map subtaskGatewayMap; + protected transient Map failedReasons; + protected transient SchemaManager schemaManager; + protected transient TableIdRouter router; + + protected SchemaRegistry( + OperatorCoordinator.Context context, + String operatorName, + ExecutorService coordinatorExecutor, + MetadataApplier metadataApplier, + List routingRules, + Duration rpcTimeout) { + this.context = context; + this.operatorName = operatorName; + this.coordinatorExecutor = coordinatorExecutor; + this.metadataApplier = metadataApplier; + this.routingRules = routingRules; + this.rpcTimeout = rpcTimeout; + } + + // --------------- + // Lifecycle hooks + // --------------- + @Override + public void start() throws Exception { + LOG.info("Starting SchemaRegistry - {}.", operatorName); + this.currentParallelism = context.currentParallelism(); + this.activeSinkWriters = ConcurrentHashMap.newKeySet(); + this.flushedSinkWriters = ConcurrentHashMap.newKeySet(); + this.subtaskGatewayMap = new ConcurrentHashMap<>(); + this.failedReasons = new ConcurrentHashMap<>(); + this.schemaManager = new SchemaManager(); + this.router = new TableIdRouter(routingRules); + } + + @Override + public void close() throws Exception { + LOG.info("Closing SchemaRegistry - {}.", operatorName); + coordinatorExecutor.shutdown(); + } + + // ------------------------------ + // Overridable checkpoint methods + // ------------------------------ + /** Snapshot current schema registry state in byte array form. */ + protected abstract void snapshot(CompletableFuture resultFuture) throws Exception; + + /** Restore schema registry state from byte array. */ + protected abstract void restore(byte[] checkpointData) throws Exception; + + // ------------------------------------ + // Overridable event & request handlers + // ------------------------------------ + + /** Overridable handler for {@link SinkWriterRegisterEvent}s. */ + protected void handleSinkWriterRegisterEvent(SinkWriterRegisterEvent event) throws Exception { + LOG.info("Sink subtask {} already registered.", event.getSubtask()); + activeSinkWriters.add(event.getSubtask()); + } + + /** Overridable handler for {@link FlushSuccessEvent}s. */ + protected void handleFlushSuccessEvent(FlushSuccessEvent event) throws Exception { + LOG.info("Sink subtask {} succeed flushing.", event.getSubtask()); + flushedSinkWriters.add(event.getSubtask()); + } + + /** Overridable handler for {@link GetEvolvedSchemaRequest}s. */ + protected void handleGetEvolvedSchemaRequest( + GetEvolvedSchemaRequest request, CompletableFuture responseFuture) + throws Exception { + LOG.info("Handling evolved schema request: {}", request); + int schemaVersion = request.getSchemaVersion(); + TableId tableId = request.getTableId(); + if (schemaVersion == GetEvolvedSchemaRequest.LATEST_SCHEMA_VERSION) { + responseFuture.complete( + wrap( + new GetEvolvedSchemaResponse( + schemaManager.getLatestEvolvedSchema(tableId).orElse(null)))); + } else { + try { + responseFuture.complete( + wrap( + new GetEvolvedSchemaResponse( + schemaManager.getEvolvedSchema(tableId, schemaVersion)))); + } catch (IllegalArgumentException iae) { + LOG.warn( + "Some client is requesting an non-existed evolved schema for table {} with version {}", + tableId, + schemaVersion); + responseFuture.complete(wrap(new GetEvolvedSchemaResponse(null))); + } + } + } + + /** Overridable handler for {@link GetOriginalSchemaRequest}s. */ + protected void handleGetOriginalSchemaRequest( + GetOriginalSchemaRequest request, + CompletableFuture responseFuture) + throws Exception { + LOG.info("Handling original schema request: {}", request); + int schemaVersion = request.getSchemaVersion(); + TableId tableId = request.getTableId(); + if (schemaVersion == GetOriginalSchemaRequest.LATEST_SCHEMA_VERSION) { + responseFuture.complete( + wrap( + new GetOriginalSchemaResponse( + schemaManager.getLatestOriginalSchema(tableId).orElse(null)))); + } else { + try { + responseFuture.complete( + wrap( + new GetOriginalSchemaResponse( + schemaManager.getOriginalSchema(tableId, schemaVersion)))); + } catch (IllegalArgumentException iae) { + LOG.warn( + "Some client is requesting an non-existed original schema for table {} with version {}", + tableId, + schemaVersion); + responseFuture.complete(wrap(new GetOriginalSchemaResponse(null))); + } + } + } + + /** Coordination handler for customized {@link CoordinationRequest}s. */ + protected abstract void handleCustomCoordinationRequest( + CoordinationRequest request, CompletableFuture responseFuture) + throws Exception; + + /** Last chance to execute codes before job fails globally. */ + protected void handleUnrecoverableError(String taskDescription, Throwable t) { + LOG.error( + "Uncaught exception in the Schema Registry ({}) event loop for {}.", + operatorName, + taskDescription, + t); + LOG.error("\tCurrent schema manager state: {}", schemaManager); + } + + // --------------------------------- + // Event & Request Dispatching Stuff + // --------------------------------- + + @Override + public final CompletableFuture handleCoordinationRequest( + CoordinationRequest request) { + CompletableFuture future = new CompletableFuture<>(); + runInEventLoop( + () -> { + if (request instanceof GetEvolvedSchemaRequest) { + handleGetEvolvedSchemaRequest((GetEvolvedSchemaRequest) request, future); + } else if (request instanceof GetOriginalSchemaRequest) { + handleGetOriginalSchemaRequest((GetOriginalSchemaRequest) request, future); + } else { + handleCustomCoordinationRequest(request, future); + } + }, + "Handling request - %s", + request); + return future; + } + + @Override + public final void handleEventFromOperator( + int subTaskId, int attemptNumber, OperatorEvent event) { + runInEventLoop( + () -> { + if (event instanceof FlushSuccessEvent) { + handleFlushSuccessEvent((FlushSuccessEvent) event); + } else if (event instanceof SinkWriterRegisterEvent) { + handleSinkWriterRegisterEvent((SinkWriterRegisterEvent) event); + } else { + throw new FlinkRuntimeException("Unrecognized Operator Event: " + event); + } + }, + "Handling event - %s (from subTask %d)", + event, + subTaskId); + } + + // -------------------------- + // Gateway registration stuff + // -------------------------- + + @Override + public final void subtaskReset(int subTaskId, long checkpointId) { + Throwable rootCause = failedReasons.get(subTaskId); + LOG.error("Subtask {} reset at checkpoint {}.", subTaskId, checkpointId, rootCause); + subtaskGatewayMap.remove(subTaskId); + } + + @Override + public final void executionAttemptFailed( + int subTaskId, int attemptNumber, @Nullable Throwable reason) { + failedReasons.put(subTaskId, reason); + } + + @Override + public final void executionAttemptReady( + int subTaskId, int attemptNumber, SubtaskGateway gateway) { + subtaskGatewayMap.put(subTaskId, gateway); + } + + // --------------------------- + // Checkpointing related stuff + // --------------------------- + @Override + public final void checkpointCoordinator( + long checkpointId, CompletableFuture completableFuture) throws Exception { + LOG.info("Going to start checkpoint No.{}", checkpointId); + runInEventLoop(() -> snapshot(completableFuture), "Taking checkpoint - %d", checkpointId); + } + + @Override + public final void notifyCheckpointComplete(long checkpointId) { + LOG.info("Successfully completed checkpoint No.{}", checkpointId); + } + + @Override + public final void resetToCheckpoint(long checkpointId, @Nullable byte[] checkpointData) + throws Exception { + LOG.info("Going to restore from checkpoint No.{}", checkpointId); + if (checkpointData == null) { + return; + } + restore(checkpointData); + } + + // --------------------------- + // Utility functions + // --------------------------- + /** + * Run a time-consuming task in given {@link ExecutorService}. All overridable functions have + * been wrapped inside already, so there's no need to call this method again. However, if you're + * overriding methods from {@link OperatorCoordinator} or {@link CoordinationRequestHandler} + * directly, make sure you're running heavy logics inside, or the entire job might hang! + */ + protected void runInEventLoop( + final ThrowingRunnable action, + final String actionName, + final Object... actionNameFormatParameters) { + coordinatorExecutor.execute( + () -> { + try { + action.run(); + } catch (Throwable t) { + // if we have a JVM critical error, promote it immediately, there is a good + // chance the logging or job failing will not succeed anymore + ExceptionUtils.rethrowIfFatalErrorOrOOM(t); + handleUnrecoverableError( + String.format(actionName, actionNameFormatParameters), t); + context.failJob(t); + } + }); + } + + /** + * Keeps checking if {@code conditionChecker} is satisfied. If not, emit a message and retry. + */ + protected void loopUntil( + BooleanSupplier conditionChecker, Runnable message, Duration timeout, Duration interval) + throws TimeoutException { + loopWhen(() -> !conditionChecker.getAsBoolean(), message, timeout, interval); + } + + /** + * Keeps checking if {@code conditionChecker} is satisfied. Otherwise, emit a message and retry. + */ + protected void loopWhen( + BooleanSupplier conditionChecker, Runnable message, Duration timeout, Duration interval) + throws TimeoutException { + long deadline = System.currentTimeMillis() + timeout.toMillis(); + long intervalMs = interval.toMillis(); + while (conditionChecker.getAsBoolean()) { + message.run(); + if (System.currentTimeMillis() > deadline) { + throw new TimeoutException("Loop checking time limit has exceeded."); + } + try { + Thread.sleep(intervalMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + } + + protected void failJob(String taskDescription, T t) { + ExceptionUtils.rethrowIfFatalErrorOrOOM(t); + LOG.error("An exception was triggered from {}. Job will fail now.", taskDescription, t); + handleUnrecoverableError(taskDescription, t); + context.failJob(t); + } + + // ------------------------ + // Visible just for testing + // ------------------------ + + @VisibleForTesting + public void emplaceOriginalSchema(TableId tableId, Schema schema) { + schemaManager.registerNewOriginalSchema(tableId, schema); + } + + @VisibleForTesting + public void emplaceEvolvedSchema(TableId tableId, Schema schema) { + schemaManager.registerNewEvolvedSchema(tableId, schema); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/TableIdRouter.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/TableIdRouter.java new file mode 100755 index 00000000000..5bd4ec8d76f --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/TableIdRouter.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.api.java.tuple.Tuple3; +import org.apache.flink.cdc.common.annotation.VisibleForTesting; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.schema.Selectors; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; + +/** + * Calculates how upstream data change events should be dispatched to downstream tables. Returns one + * or many destination Table IDs based on provided routing rules. + */ +public class TableIdRouter { + + private final List> routes; + + public TableIdRouter(List routingRules) { + this.routes = new ArrayList<>(); + for (RouteRule rule : routingRules) { + try { + String tableInclusions = rule.sourceTable; + Selectors selectors = + new Selectors.SelectorsBuilder().includeTables(tableInclusions).build(); + routes.add(new Tuple3<>(selectors, rule.sinkTable, rule.replaceSymbol)); + } catch (PatternSyntaxException e) { + throw new IllegalArgumentException( + String.format( + "Failed to parse regular expression in routing rule %s. Notice that `.` is used to separate Table ID components. To use it as a regex token, put a `\\` before to escape it.", + rule), + e); + } + } + } + + public List route(TableId sourceTableId) { + List routedTableIds = + routes.stream() + .filter(route -> route.f0.isMatch(sourceTableId)) + .map(route -> resolveReplacement(sourceTableId, route)) + .collect(Collectors.toList()); + if (routedTableIds.isEmpty()) { + routedTableIds.add(sourceTableId); + } + return routedTableIds; + } + + @VisibleForTesting + List> getRoutes() { + return routes; + } + + private TableId resolveReplacement( + TableId originalTable, Tuple3 route) { + if (route.f2 != null) { + return TableId.parse(route.f1.replace(route.f2, originalTable.getTableName())); + } + return TableId.parse(route.f1); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/CoordinationResponseUtils.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/CoordinationResponseUtils.java similarity index 98% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/CoordinationResponseUtils.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/CoordinationResponseUtils.java index 4f13e928e1e..0024102127e 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/CoordinationResponseUtils.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/CoordinationResponseUtils.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; import org.apache.flink.api.common.typeutils.TypeSerializer; diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/FlushSuccessEvent.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/FlushSuccessEvent.java similarity index 63% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/FlushSuccessEvent.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/FlushSuccessEvent.java index 6959e883b85..c64858a2b65 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/FlushSuccessEvent.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/FlushSuccessEvent.java @@ -15,12 +15,14 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.operators.coordination.OperatorEvent; +import javax.annotation.Nullable; + import java.util.Objects; /** @@ -42,7 +44,23 @@ public class FlushSuccessEvent implements OperatorEvent { */ private final long nonce; - public FlushSuccessEvent(int subtask, TableId tableId, long nonce) { + /** + * A {@link FlushSuccessEvent} Corresponding to a {@code FlushEvent} that does not specifically + * flush one single table. + */ + public static FlushSuccessEvent ofAll(int subtask) { + return new FlushSuccessEvent(subtask, null, -1); + } + + /** + * A {@link FlushSuccessEvent} Corresponding to a regular {@code FlushEvent} that flushes a + * table. + */ + public static FlushSuccessEvent of(int subtask, TableId tableId, long nonce) { + return new FlushSuccessEvent(subtask, tableId, nonce); + } + + protected FlushSuccessEvent(int subtask, TableId tableId, long nonce) { this.subtask = subtask; this.tableId = tableId; this.nonce = nonce; @@ -52,7 +70,7 @@ public int getSubtask() { return subtask; } - public TableId getTableId() { + public @Nullable TableId getTableId() { return tableId; } @@ -78,4 +96,20 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(subtask, tableId, nonce); } + + @Override + public String toString() { + if (tableId == null) { + return "FlushSuccessEvent{subtask= " + subtask + ", << not table-specific >> }"; + } else { + return "FlushSuccessEvent{" + + "subtask=" + + subtask + + ", tableId=" + + tableId + + ", nonce=" + + nonce + + '}'; + } + } } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetEvolvedSchemaRequest.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetEvolvedSchemaRequest.java similarity index 92% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetEvolvedSchemaRequest.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetEvolvedSchemaRequest.java index f5b7a58607a..69ab9a314a3 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetEvolvedSchemaRequest.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetEvolvedSchemaRequest.java @@ -15,11 +15,11 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.operators.coordination.CoordinationRequest; /** Request to {@link SchemaRegistry} for getting schema of a table. */ diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetEvolvedSchemaResponse.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetEvolvedSchemaResponse.java similarity index 90% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetEvolvedSchemaResponse.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetEvolvedSchemaResponse.java index 81fef92d7e9..f2f7e6764f5 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetEvolvedSchemaResponse.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetEvolvedSchemaResponse.java @@ -15,11 +15,11 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.operators.coordination.CoordinationResponse; import javax.annotation.Nullable; diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetOriginalSchemaRequest.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetOriginalSchemaRequest.java similarity index 92% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetOriginalSchemaRequest.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetOriginalSchemaRequest.java index 36544da7746..87c7ce15a4f 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetOriginalSchemaRequest.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetOriginalSchemaRequest.java @@ -15,11 +15,11 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.operators.coordination.CoordinationRequest; /** Request to {@link SchemaRegistry} for getting original schema of a table. */ diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetOriginalSchemaResponse.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetOriginalSchemaResponse.java similarity index 90% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetOriginalSchemaResponse.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetOriginalSchemaResponse.java index 2c5343349c0..031e88cfbc4 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/GetOriginalSchemaResponse.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/GetOriginalSchemaResponse.java @@ -15,11 +15,11 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.operators.coordination.CoordinationResponse; import javax.annotation.Nullable; diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SinkWriterRegisterEvent.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/SinkWriterRegisterEvent.java similarity index 91% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SinkWriterRegisterEvent.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/SinkWriterRegisterEvent.java index 190dbafc266..ed05d7d2ec9 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SinkWriterRegisterEvent.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/common/SinkWriterRegisterEvent.java @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.common; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.operators.coordination.OperatorEvent; import java.util.Objects; diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/distributed/SchemaChangeRequest.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/distributed/SchemaChangeRequest.java new file mode 100644 index 00000000000..468cccf9948 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/distributed/SchemaChangeRequest.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common.event.distributed; + +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.utils.Preconditions; +import org.apache.flink.cdc.runtime.operators.schema.distributed.SchemaCoordinator; +import org.apache.flink.runtime.operators.coordination.CoordinationRequest; + +/** Mapper's request to {@link SchemaCoordinator} for reducing an incompatible schema. */ +public class SchemaChangeRequest implements CoordinationRequest { + // Indicating which source subTask does this schema change event comes from. + private final int sourceSubTaskId; + + // Indicating which schema mapper initiates this schema change request. + private final int sinkSubTaskId; + + // A schema change event is uniquely bound to a sourceSubTaskId. + private final SchemaChangeEvent schemaChangeEvent; + + public static SchemaChangeRequest createNoOpRequest(int sinkSubTaskId) { + return new SchemaChangeRequest(-1, sinkSubTaskId, null); + } + + public SchemaChangeRequest( + int sourceSubTaskId, int sinkSubTaskId, SchemaChangeEvent schemaChangeEvent) { + this.sourceSubTaskId = sourceSubTaskId; + this.sinkSubTaskId = sinkSubTaskId; + this.schemaChangeEvent = schemaChangeEvent; + } + + // Checking if this schema request was invalidated since it has been submitted by another + // downstream sink subTask before. + public boolean isNoOpRequest() { + return sourceSubTaskId == -1 || schemaChangeEvent == null; + } + + public int getSourceSubTaskId() { + Preconditions.checkState( + !isNoOpRequest(), "Unable to fetch source subTaskId for an align event."); + return sourceSubTaskId; + } + + public int getSinkSubTaskId() { + return sinkSubTaskId; + } + + public SchemaChangeEvent getSchemaChangeEvent() { + Preconditions.checkState( + !isNoOpRequest(), "Unable to fetch source subTaskId for an align event."); + return schemaChangeEvent; + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/distributed/SchemaChangeResponse.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/distributed/SchemaChangeResponse.java new file mode 100644 index 00000000000..07410650566 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/distributed/SchemaChangeResponse.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common.event.distributed; + +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.runtime.operators.schema.distributed.SchemaCoordinator; +import org.apache.flink.runtime.operators.coordination.CoordinationResponse; + +import java.util.List; +import java.util.Objects; + +/** Mapper's request to {@link SchemaCoordinator} for reducing an incompatible schema. */ +public class SchemaChangeResponse implements CoordinationResponse { + + private final List reducedSchemaResult; + private final int reduceSeqNum; + + public SchemaChangeResponse(List reducedSchemaResult, int reduceSeqNum) { + this.reducedSchemaResult = reducedSchemaResult; + this.reduceSeqNum = reduceSeqNum; + } + + public List getReducedSchemaResult() { + return reducedSchemaResult; + } + + public int getReduceSeqNum() { + return reduceSeqNum; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof SchemaChangeResponse)) { + return false; + } + SchemaChangeResponse that = (SchemaChangeResponse) o; + return Objects.equals(reducedSchemaResult, that.reducedSchemaResult) + && reduceSeqNum == that.reduceSeqNum; + } + + @Override + public int hashCode() { + return Objects.hash(reducedSchemaResult, reduceSeqNum); + } + + @Override + public String toString() { + return "ReduceSchemaResponse{" + + "reducedSchemaResult=" + + reducedSchemaResult + + ", reduceSeqNum=" + + reduceSeqNum + + '}'; + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeRequest.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/regular/SchemaChangeRequest.java similarity index 79% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeRequest.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/regular/SchemaChangeRequest.java index fba1e505577..cac079c518e 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeRequest.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/regular/SchemaChangeRequest.java @@ -15,18 +15,19 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.regular; import org.apache.flink.cdc.common.event.SchemaChangeEvent; import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaCoordinator; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; import org.apache.flink.runtime.operators.coordination.CoordinationRequest; import java.util.Objects; /** - * The request from {@link SchemaOperator} to {@link SchemaRegistry} to request to change schemas. + * The request from {@link SchemaOperator} to {@link SchemaCoordinator} to request to change + * schemas. */ public class SchemaChangeRequest implements CoordinationRequest { @@ -34,10 +35,13 @@ public class SchemaChangeRequest implements CoordinationRequest { /** The sender of the request. */ private final TableId tableId; + /** The schema changes. */ private final SchemaChangeEvent schemaChangeEvent; + /** The ID of subTask that initiated the request. */ private final int subTaskId; + /** * Nonce code to distinguish flush events corresponding to each schema change event from * different subTasks. @@ -87,4 +91,18 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(tableId, schemaChangeEvent, subTaskId, nonce); } + + @Override + public String toString() { + return "SchemaChangeRequest{" + + "tableId=" + + tableId + + ", schemaChangeEvent=" + + schemaChangeEvent + + ", subTaskId=" + + subTaskId + + ", nonce=" + + nonce + + '}'; + } } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResponse.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/regular/SchemaChangeResponse.java similarity index 62% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResponse.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/regular/SchemaChangeResponse.java index bea0ce65ce1..8ed31373cd1 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResponse.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/event/regular/SchemaChangeResponse.java @@ -15,60 +15,73 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.event; +package org.apache.flink.cdc.runtime.operators.schema.common.event.regular; import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaCoordinator; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; import org.apache.flink.runtime.operators.coordination.CoordinationResponse; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Objects; /** - * The response for {@link SchemaChangeRequest} from {@link SchemaRegistry} to {@link + * The response for {@link SchemaChangeRequest} from {@link SchemaCoordinator} to {@link * SchemaOperator}. */ public class SchemaChangeResponse implements CoordinationResponse { private static final long serialVersionUID = 1L; /** - * Whether the SchemaOperator need to buffer data and the SchemaOperatorCoordinator need to wait - * for flushing. + * Actually finished schema change events. This will only be effective if status is {@code + * accepted}. */ - private final List schemaChangeEvents; + private final List appliedSchemaChangeEvents; + + private final Map evolvedSchemas; private final ResponseCode responseCode; - public static SchemaChangeResponse accepted(List schemaChangeEvents) { - return new SchemaChangeResponse(schemaChangeEvents, ResponseCode.ACCEPTED); + public static SchemaChangeResponse success( + List schemaChangeEvents, Map evolvedSchemas) { + return new SchemaChangeResponse(ResponseCode.SUCCESS, schemaChangeEvents, evolvedSchemas); } public static SchemaChangeResponse busy() { - return new SchemaChangeResponse(Collections.emptyList(), ResponseCode.BUSY); + return new SchemaChangeResponse(ResponseCode.BUSY); } public static SchemaChangeResponse duplicate() { - return new SchemaChangeResponse(Collections.emptyList(), ResponseCode.DUPLICATE); + return new SchemaChangeResponse(ResponseCode.DUPLICATE); } public static SchemaChangeResponse ignored() { - return new SchemaChangeResponse(Collections.emptyList(), ResponseCode.IGNORED); + return new SchemaChangeResponse(ResponseCode.IGNORED); } public static SchemaChangeResponse waitingForFlush() { - return new SchemaChangeResponse(Collections.emptyList(), ResponseCode.WAITING_FOR_FLUSH); + return new SchemaChangeResponse(ResponseCode.WAITING_FOR_FLUSH); + } + + private SchemaChangeResponse(ResponseCode responseCode) { + this(responseCode, Collections.emptyList(), Collections.emptyMap()); } private SchemaChangeResponse( - List schemaChangeEvents, ResponseCode responseCode) { - this.schemaChangeEvents = schemaChangeEvents; + ResponseCode responseCode, + List appliedSchemaChangeEvents, + Map evolvedSchemas) { this.responseCode = responseCode; + this.appliedSchemaChangeEvents = appliedSchemaChangeEvents; + this.evolvedSchemas = evolvedSchemas; } - public boolean isAccepted() { - return ResponseCode.ACCEPTED.equals(responseCode); + public boolean isSuccess() { + return ResponseCode.SUCCESS.equals(responseCode); } public boolean isRegistryBusy() { @@ -87,8 +100,12 @@ public boolean isWaitingForFlush() { return ResponseCode.WAITING_FOR_FLUSH.equals(responseCode); } - public List getSchemaChangeEvents() { - return schemaChangeEvents; + public List getAppliedSchemaChangeEvents() { + return appliedSchemaChangeEvents; + } + + public Map getEvolvedSchemas() { + return evolvedSchemas; } @Override @@ -100,20 +117,20 @@ public boolean equals(Object o) { return false; } SchemaChangeResponse response = (SchemaChangeResponse) o; - return Objects.equals(schemaChangeEvents, response.schemaChangeEvents) + return Objects.equals(appliedSchemaChangeEvents, response.appliedSchemaChangeEvents) && responseCode == response.responseCode; } @Override public int hashCode() { - return Objects.hash(schemaChangeEvents, responseCode); + return Objects.hash(appliedSchemaChangeEvents, responseCode); } @Override public String toString() { return "SchemaChangeResponse{" + "schemaChangeEvents=" - + schemaChangeEvents + + appliedSchemaChangeEvents + ", responseCode=" + responseCode + '}'; @@ -134,7 +151,7 @@ public String toString() { * required. Possibly caused by LENIENT mode or merging table strategies. */ public enum ResponseCode { - ACCEPTED, + SUCCESS, BUSY, DUPLICATE, IGNORED, diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/metrics/SchemaOperatorMetrics.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/metrics/SchemaOperatorMetrics.java similarity index 96% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/metrics/SchemaOperatorMetrics.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/metrics/SchemaOperatorMetrics.java index 4ba8e30c719..d40ffdafe6f 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/metrics/SchemaOperatorMetrics.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/common/metrics/SchemaOperatorMetrics.java @@ -15,10 +15,10 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.metrics; +package org.apache.flink.cdc.runtime.operators.schema.common.metrics; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; import org.apache.flink.metrics.Counter; import org.apache.flink.metrics.MetricGroup; diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaDerivation.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaDerivation.java deleted file mode 100644 index 39ebc50ba34..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaDerivation.java +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.coordinator; - -import org.apache.flink.api.java.tuple.Tuple3; -import org.apache.flink.cdc.common.event.AddColumnEvent; -import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; -import org.apache.flink.cdc.common.event.CreateTableEvent; -import org.apache.flink.cdc.common.event.RenameColumnEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.event.visitor.SchemaChangeEventVisitor; -import org.apache.flink.cdc.common.route.RouteRule; -import org.apache.flink.cdc.common.schema.Column; -import org.apache.flink.cdc.common.schema.PhysicalColumn; -import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.schema.Selectors; -import org.apache.flink.cdc.common.types.DataType; -import org.apache.flink.cdc.common.utils.ChangeEventUtils; -import org.apache.flink.cdc.common.utils.SchemaUtils; -import org.apache.flink.cdc.runtime.serializer.TableIdSerializer; -import org.apache.flink.core.memory.DataInputViewStreamWrapper; -import org.apache.flink.core.memory.DataOutputViewStreamWrapper; - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; - -/** Derive schema changes based on the routing rules. */ -public class SchemaDerivation { - private final SchemaManager schemaManager; - private final Map> derivationMapping; - - /** - * Storing route source table selector, sink table name (before symbol replacement), and replace - * symbol in a tuple. - */ - private transient List> routes; - - public SchemaDerivation( - SchemaManager schemaManager, - List routeRules, - Map> derivationMapping) { - this.schemaManager = schemaManager; - this.routes = - routeRules.stream() - .map( - rule -> { - String tableInclusions = rule.sourceTable; - Selectors selectors = - new Selectors.SelectorsBuilder() - .includeTables(tableInclusions) - .build(); - return new Tuple3<>( - selectors, rule.sinkTable, rule.replaceSymbol); - }) - .collect(Collectors.toList()); - this.derivationMapping = derivationMapping; - } - - public List applySchemaChange(SchemaChangeEvent schemaChangeEvent) { - List events = new ArrayList<>(); - TableId originalTable = schemaChangeEvent.tableId(); - boolean noRouteMatched = true; - - for (Tuple3 route : routes) { - // Check routing table - if (!route.f0.isMatch(originalTable)) { - continue; - } - - noRouteMatched = false; - - // Matched a routing rule - TableId derivedTable = resolveReplacement(originalTable, route); - Set originalTables = - derivationMapping.computeIfAbsent(derivedTable, t -> new HashSet<>()); - originalTables.add(originalTable); - - if (originalTables.size() == 1) { - // single source mapping, replace the table ID directly - SchemaChangeEvent derivedSchemaChangeEvent = - ChangeEventUtils.recreateSchemaChangeEvent(schemaChangeEvent, derivedTable); - events.add(derivedSchemaChangeEvent); - } else { - // multiple source mapping (merging tables) - Schema derivedTableSchema = - schemaManager.getLatestEvolvedSchema(derivedTable).get(); - events.addAll( - Objects.requireNonNull( - SchemaChangeEventVisitor.visit( - schemaChangeEvent, - addColumnEvent -> - handleAddColumnEvent( - addColumnEvent, - derivedTableSchema, - derivedTable), - alterColumnTypeEvent -> - handleAlterColumnTypeEvent( - alterColumnTypeEvent, - derivedTableSchema, - derivedTable), - createTableEvent -> - handleCreateTableEvent( - createTableEvent, - derivedTableSchema, - derivedTable), - dropColumnEvent -> - Collections.emptyList(), // Column drop shouldn't be - // spread to route - // destination. - dropTableEvent -> - Collections.emptyList(), // Table drop shouldn't be - // spread to route - // destination. - renameColumnEvent -> - handleRenameColumnEvent( - renameColumnEvent, - derivedTableSchema, - derivedTable), - truncateTableEvent -> - Collections.emptyList() // // Table truncation - // shouldn't be spread to route - // destination. - ))); - } - } - - if (noRouteMatched) { - // No routes are matched, leave it as-is - return Collections.singletonList(schemaChangeEvent); - } else { - return events; - } - } - - private TableId resolveReplacement( - TableId originalTable, Tuple3 route) { - if (route.f2 != null) { - return TableId.parse(route.f1.replace(route.f2, originalTable.getTableName())); - } - return TableId.parse(route.f1); - } - - public Map> getDerivationMapping() { - return derivationMapping; - } - - public static void serializeDerivationMapping( - SchemaDerivation schemaDerivation, DataOutputStream out) throws IOException { - TableIdSerializer tableIdSerializer = TableIdSerializer.INSTANCE; - // Serialize derivation mapping in SchemaDerivation - Map> derivationMapping = schemaDerivation.getDerivationMapping(); - out.writeInt(derivationMapping.size()); - for (Map.Entry> entry : derivationMapping.entrySet()) { - // Routed table ID - TableId routedTableId = entry.getKey(); - tableIdSerializer.serialize(routedTableId, new DataOutputViewStreamWrapper(out)); - // Original table IDs - Set originalTableIds = entry.getValue(); - out.writeInt(originalTableIds.size()); - for (TableId originalTableId : originalTableIds) { - tableIdSerializer.serialize(originalTableId, new DataOutputViewStreamWrapper(out)); - } - } - } - - public static Map> deserializerDerivationMapping(DataInputStream in) - throws IOException { - TableIdSerializer tableIdSerializer = TableIdSerializer.INSTANCE; - int derivationMappingSize = in.readInt(); - Map> derivationMapping = new HashMap<>(derivationMappingSize); - for (int i = 0; i < derivationMappingSize; i++) { - // Routed table ID - TableId routedTableId = - tableIdSerializer.deserialize(new DataInputViewStreamWrapper(in)); - // Original table IDs - int numOriginalTables = in.readInt(); - Set originalTableIds = new HashSet<>(numOriginalTables); - for (int j = 0; j < numOriginalTables; j++) { - TableId originalTableId = - tableIdSerializer.deserialize(new DataInputViewStreamWrapper(in)); - originalTableIds.add(originalTableId); - } - derivationMapping.put(routedTableId, originalTableIds); - } - return derivationMapping; - } - - private List handleRenameColumnEvent( - RenameColumnEvent renameColumnEvent, Schema derivedTableSchema, TableId derivedTable) { - List newColumns = new ArrayList<>(); - renameColumnEvent - .getNameMapping() - .forEach( - (before, after) -> { - if (derivedTableSchema.getColumn(after).isPresent()) { - return; - } - Column existedColumn = derivedTableSchema.getColumn(before).get(); - newColumns.add( - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn( - after, - existedColumn.getType(), - existedColumn.getComment()))); - }); - List schemaChangeEvents = new ArrayList<>(); - if (!newColumns.isEmpty()) { - AddColumnEvent derivedSchemaChangeEvent = new AddColumnEvent(derivedTable, newColumns); - schemaChangeEvents.add(derivedSchemaChangeEvent); - } - return schemaChangeEvents; - } - - private List handleAlterColumnTypeEvent( - AlterColumnTypeEvent alterColumnTypeEvent, - Schema derivedTableSchema, - TableId derivedTable) { - Map typeDifference = new HashMap<>(); - alterColumnTypeEvent - .getTypeMapping() - .forEach( - (columnName, dataType) -> { - Column existedColumnInDerivedTable = - derivedTableSchema.getColumn(columnName).get(); - if (!existedColumnInDerivedTable.getType().equals(dataType)) { - // Check type compatibility - DataType widerType = - getWiderType( - columnName, - existedColumnInDerivedTable.getType(), - dataType); - if (!widerType.equals(existedColumnInDerivedTable.getType())) { - typeDifference.put( - existedColumnInDerivedTable.getName(), widerType); - } - } - }); - List schemaChangeEvents = new ArrayList<>(); - if (!typeDifference.isEmpty()) { - AlterColumnTypeEvent derivedSchemaChangeEvent = - new AlterColumnTypeEvent(derivedTable, typeDifference); - schemaChangeEvents.add(derivedSchemaChangeEvent); - } - return schemaChangeEvents; - } - - private List handleAddColumnEvent( - AddColumnEvent addColumnEvent, Schema derivedTableSchema, TableId derivedTable) { - List newColumns = new ArrayList<>(); - Map newTypeMapping = new HashMap<>(); - // Check if new column already existed in the derived table - for (AddColumnEvent.ColumnWithPosition addedColumn : addColumnEvent.getAddedColumns()) { - Optional optionalColumnInDerivedTable = - derivedTableSchema.getColumn(addedColumn.getAddColumn().getName()); - if (!optionalColumnInDerivedTable.isPresent()) { - // Non-existed column. Use AddColumn - newColumns.add(new AddColumnEvent.ColumnWithPosition(addedColumn.getAddColumn())); - } else { - // Existed column. Check type compatibility - Column existedColumnInDerivedTable = optionalColumnInDerivedTable.get(); - if (!existedColumnInDerivedTable - .getType() - .equals(addedColumn.getAddColumn().getType())) { - DataType widerType = - getWiderType( - existedColumnInDerivedTable.getName(), - existedColumnInDerivedTable.getType(), - addedColumn.getAddColumn().getType()); - if (!widerType.equals(existedColumnInDerivedTable.getType())) { - newTypeMapping.put(existedColumnInDerivedTable.getName(), widerType); - } - } - } - } - - List schemaChangeEvents = new ArrayList<>(); - if (!newColumns.isEmpty()) { - schemaChangeEvents.add(new AddColumnEvent(derivedTable, newColumns)); - } - if (!newTypeMapping.isEmpty()) { - schemaChangeEvents.add(new AlterColumnTypeEvent(derivedTable, newTypeMapping)); - } - return schemaChangeEvents; - } - - private List handleCreateTableEvent( - CreateTableEvent createTableEvent, Schema derivedTableSchema, TableId derivedTable) { - List newColumns = new ArrayList<>(); - Map newTypeMapping = new HashMap<>(); - // Check if there is any columns that doesn't exist in the derived table - // and perform add-column for non-existed columns. - for (Column column : createTableEvent.getSchema().getColumns()) { - Optional optionalColumnInDerivedTable = - derivedTableSchema.getColumn(column.getName()); - if (!optionalColumnInDerivedTable.isPresent()) { - // Non-existed column. Use AddColumn - newColumns.add(new AddColumnEvent.ColumnWithPosition(column)); - } else { - // Existed column. Check type compatibility - Column existedColumnInDerivedTable = optionalColumnInDerivedTable.get(); - if (!existedColumnInDerivedTable.getType().equals(column.getType())) { - DataType widerType = - getWiderType( - existedColumnInDerivedTable.getName(), - existedColumnInDerivedTable.getType(), - column.getType()); - if (!widerType.equals(existedColumnInDerivedTable.getType())) { - newTypeMapping.put(existedColumnInDerivedTable.getName(), widerType); - } - } - } - } - - List schemaChangeEvents = new ArrayList<>(); - if (!newColumns.isEmpty()) { - schemaChangeEvents.add(new AddColumnEvent(derivedTable, newColumns)); - } - if (!newTypeMapping.isEmpty()) { - schemaChangeEvents.add(new AlterColumnTypeEvent(derivedTable, newTypeMapping)); - } - return schemaChangeEvents; - } - - private DataType getWiderType(String columnName, DataType thisType, DataType thatType) { - try { - return SchemaUtils.inferWiderType(thisType, thatType); - } catch (IllegalStateException e) { - throw new IllegalStateException( - String.format( - "Incompatible types found for column `%s`: \"%s\" and \"%s\"", - columnName, thisType, thatType)); - } - } -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistry.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistry.java deleted file mode 100644 index 9c13b9ed5fe..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistry.java +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.coordinator; - -import org.apache.flink.cdc.common.annotation.VisibleForTesting; -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; -import org.apache.flink.cdc.common.route.RouteRule; -import org.apache.flink.cdc.common.sink.MetadataApplier; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.event.FlushSuccessEvent; -import org.apache.flink.cdc.runtime.operators.schema.event.GetEvolvedSchemaRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.GetEvolvedSchemaResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.GetOriginalSchemaRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.GetOriginalSchemaResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResultRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SinkWriterRegisterEvent; -import org.apache.flink.runtime.operators.coordination.CoordinationRequest; -import org.apache.flink.runtime.operators.coordination.CoordinationRequestHandler; -import org.apache.flink.runtime.operators.coordination.CoordinationResponse; -import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; -import org.apache.flink.runtime.operators.coordination.OperatorEvent; -import org.apache.flink.util.ExceptionUtils; -import org.apache.flink.util.FlinkException; -import org.apache.flink.util.function.ThrowingRunnable; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nullable; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; - -import static org.apache.flink.cdc.runtime.operators.schema.event.CoordinationResponseUtils.wrap; - -/** - * The implementation of the {@link OperatorCoordinator} for the {@link SchemaOperator}. - * - *

The SchemaRegister provides an event loop style thread model to interact with the - * Flink runtime. The coordinator ensures that all the state manipulations are made by its event - * loop thread. - * - *

This SchemaRegister is responsible for: - * - *

    - *
  • Apply schema changes when receiving the {@link SchemaChangeRequest} from {@link - * SchemaOperator} - *
  • Notify {@link SchemaOperator} to continue to push data for the table after receiving {@link - * FlushSuccessEvent} from its registered sink writer - *
- */ -public class SchemaRegistry implements OperatorCoordinator, CoordinationRequestHandler { - - private static final Logger LOG = LoggerFactory.getLogger(SchemaRegistry.class); - - /** The context of the coordinator. */ - private final OperatorCoordinator.Context context; - /** The name of the operator this SchemaOperatorCoordinator is associated with. */ - private final String operatorName; - - /** A single-thread executor to handle async execution of the coordinator. */ - private final ExecutorService coordinatorExecutor; - - /** - * Tracks the subtask failed reason to throw a more meaningful exception in {@link - * #subtaskReset}. - */ - private final Map failedReasons; - - /** Metadata applier for applying schema changes to external system. */ - private final MetadataApplier metadataApplier; - - private final List routes; - - /** The request handler that handle all requests and events. */ - private SchemaRegistryRequestHandler requestHandler; - - /** Schema manager for tracking schemas of all tables. */ - private SchemaManager schemaManager; - - private SchemaDerivation schemaDerivation; - - private SchemaChangeBehavior schemaChangeBehavior; - - /** - * Current parallelism. Use this to verify if Schema Registry has collected enough flush success - * events from sink operators. - */ - private int currentParallelism; - - public SchemaRegistry( - String operatorName, - OperatorCoordinator.Context context, - ExecutorService executorService, - MetadataApplier metadataApplier, - List routes) { - this( - operatorName, - context, - executorService, - metadataApplier, - routes, - SchemaChangeBehavior.LENIENT); - } - - public SchemaRegistry( - String operatorName, - OperatorCoordinator.Context context, - ExecutorService coordinatorExecutor, - MetadataApplier metadataApplier, - List routes, - SchemaChangeBehavior schemaChangeBehavior) { - this.context = context; - this.coordinatorExecutor = coordinatorExecutor; - this.operatorName = operatorName; - this.failedReasons = new HashMap<>(); - this.metadataApplier = metadataApplier; - this.routes = routes; - this.schemaManager = new SchemaManager(schemaChangeBehavior); - this.schemaDerivation = new SchemaDerivation(schemaManager, routes, new HashMap<>()); - this.requestHandler = - new SchemaRegistryRequestHandler( - metadataApplier, - schemaManager, - schemaDerivation, - schemaChangeBehavior, - context); - this.schemaChangeBehavior = schemaChangeBehavior; - } - - @Override - public void start() throws Exception { - LOG.info("Starting SchemaRegistry for {}.", operatorName); - this.failedReasons.clear(); - this.currentParallelism = context.currentParallelism(); - LOG.info( - "Started SchemaRegistry for {}. Parallelism: {}", operatorName, currentParallelism); - } - - @Override - public void close() throws Exception { - LOG.info("SchemaRegistry for {} closed.", operatorName); - coordinatorExecutor.shutdown(); - requestHandler.close(); - } - - @Override - public void handleEventFromOperator(int subtask, int attemptNumber, OperatorEvent event) { - runInEventLoop( - () -> { - try { - if (event instanceof FlushSuccessEvent) { - FlushSuccessEvent flushSuccessEvent = (FlushSuccessEvent) event; - LOG.info( - "Sink subtask {} succeed flushing for table {} (nonce: {}).", - flushSuccessEvent.getSubtask(), - flushSuccessEvent.getTableId().toString(), - flushSuccessEvent.getNonce()); - requestHandler.flushSuccess( - flushSuccessEvent.getSubtask(), flushSuccessEvent.getNonce()); - } else if (event instanceof SinkWriterRegisterEvent) { - requestHandler.registerSinkWriter( - ((SinkWriterRegisterEvent) event).getSubtask()); - } else { - throw new FlinkException("Unrecognized Operator Event: " + event); - } - } catch (Throwable t) { - context.failJob(t); - throw t; - } - }, - "handling event %s from subTask %d", - event, - subtask); - } - - @Override - public void checkpointCoordinator(long checkpointId, CompletableFuture resultFuture) { - // we generate checkpoint in an async thread to not block the JobManager's main thread, the - // coordinator state might be large if there are many schema changes and monitor many - // tables. - runInEventLoop( - () -> { - try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(baos)) { - // Serialize SchemaManager - int schemaManagerSerializerVersion = SchemaManager.SERIALIZER.getVersion(); - out.writeInt(schemaManagerSerializerVersion); - byte[] serializedSchemaManager = - SchemaManager.SERIALIZER.serialize(schemaManager); - out.writeInt(serializedSchemaManager.length); - out.write(serializedSchemaManager); - // Serialize SchemaDerivation mapping - SchemaDerivation.serializeDerivationMapping(schemaDerivation, out); - resultFuture.complete(baos.toByteArray()); - } catch (Throwable t) { - context.failJob(t); - throw t; - } - }, - "taking checkpoint %d", - checkpointId); - } - - private void runInEventLoop( - final ThrowingRunnable action, - final String actionName, - final Object... actionNameFormatParameters) { - coordinatorExecutor.execute( - () -> { - try { - action.run(); - } catch (Throwable t) { - // if we have a JVM critical error, promote it immediately, there is a good - // chance the logging or job failing will not succeed anymore - ExceptionUtils.rethrowIfFatalErrorOrOOM(t); - - final String actionString = - String.format(actionName, actionNameFormatParameters); - LOG.error( - "Uncaught exception in the SchemaEvolutionCoordinator for {} while {}. Triggering job failover.", - operatorName, - actionString, - t); - context.failJob(t); - } - }); - } - - @Override - public void notifyCheckpointComplete(long checkpointId) { - // do nothing - } - - @Override - public CompletableFuture handleCoordinationRequest( - CoordinationRequest request) { - CompletableFuture responseFuture = new CompletableFuture<>(); - runInEventLoop( - () -> { - try { - if (request instanceof SchemaChangeRequest) { - SchemaChangeRequest schemaChangeRequest = (SchemaChangeRequest) request; - requestHandler.handleSchemaChangeRequest( - schemaChangeRequest, responseFuture); - } else if (request instanceof SchemaChangeResultRequest) { - requestHandler.getSchemaChangeResult( - (SchemaChangeResultRequest) request, responseFuture); - } else if (request instanceof GetEvolvedSchemaRequest) { - handleGetEvolvedSchemaRequest( - ((GetEvolvedSchemaRequest) request), responseFuture); - } else if (request instanceof GetOriginalSchemaRequest) { - handleGetOriginalSchemaRequest( - (GetOriginalSchemaRequest) request, responseFuture); - } else { - throw new IllegalArgumentException( - "Unrecognized CoordinationRequest type: " + request); - } - } catch (Throwable t) { - context.failJob(t); - throw t; - } - }, - "handling coordination request %s", - request); - return responseFuture; - } - - @Override - public void resetToCheckpoint(long checkpointId, @Nullable byte[] checkpointData) - throws Exception { - if (checkpointData == null) { - return; - } - try (ByteArrayInputStream bais = new ByteArrayInputStream(checkpointData); - DataInputStream in = new DataInputStream(bais)) { - int schemaManagerSerializerVersion = in.readInt(); - - switch (schemaManagerSerializerVersion) { - case 0: - { - int length = in.readInt(); - byte[] serializedSchemaManager = new byte[length]; - in.readFully(serializedSchemaManager); - schemaManager = - SchemaManager.SERIALIZER.deserialize( - schemaManagerSerializerVersion, serializedSchemaManager); - schemaDerivation = - new SchemaDerivation(schemaManager, routes, Collections.emptyMap()); - requestHandler = - new SchemaRegistryRequestHandler( - metadataApplier, - schemaManager, - schemaDerivation, - schemaManager.getBehavior(), - context); - break; - } - case 1: - case 2: - { - int length = in.readInt(); - byte[] serializedSchemaManager = new byte[length]; - in.readFully(serializedSchemaManager); - schemaManager = - SchemaManager.SERIALIZER.deserialize( - schemaManagerSerializerVersion, serializedSchemaManager); - Map> derivationMapping = - SchemaDerivation.deserializerDerivationMapping(in); - schemaDerivation = - new SchemaDerivation(schemaManager, routes, derivationMapping); - requestHandler = - new SchemaRegistryRequestHandler( - metadataApplier, - schemaManager, - schemaDerivation, - schemaChangeBehavior, - context); - break; - } - default: - throw new IOException( - "Unrecognized serialization version " + schemaManagerSerializerVersion); - } - } catch (Throwable t) { - context.failJob(t); - throw t; - } - } - - @Override - public void subtaskReset(int subtask, long checkpointId) { - Throwable rootCause = failedReasons.get(subtask); - LOG.error( - String.format("Subtask %d reset at checkpoint %d.", subtask, checkpointId), - rootCause); - } - - @Override - public void executionAttemptFailed( - int subtask, int attemptNumber, @Nullable Throwable throwable) { - failedReasons.put(subtask, throwable); - } - - @Override - public void executionAttemptReady( - int subtask, int attemptNumber, SubtaskGateway subtaskGateway) { - // do nothing - } - - private void handleGetEvolvedSchemaRequest( - GetEvolvedSchemaRequest getEvolvedSchemaRequest, - CompletableFuture response) { - LOG.info("Handling evolved schema request: {}", getEvolvedSchemaRequest); - int schemaVersion = getEvolvedSchemaRequest.getSchemaVersion(); - TableId tableId = getEvolvedSchemaRequest.getTableId(); - if (schemaVersion == GetEvolvedSchemaRequest.LATEST_SCHEMA_VERSION) { - response.complete( - wrap( - new GetEvolvedSchemaResponse( - schemaManager.getLatestEvolvedSchema(tableId).orElse(null)))); - } else { - try { - response.complete( - wrap( - new GetEvolvedSchemaResponse( - schemaManager.getEvolvedSchema(tableId, schemaVersion)))); - } catch (IllegalArgumentException iae) { - LOG.warn( - "Some client is requesting an non-existed evolved schema for table {} with version {}", - tableId, - schemaVersion); - response.complete(wrap(new GetEvolvedSchemaResponse(null))); - } - } - } - - private void handleGetOriginalSchemaRequest( - GetOriginalSchemaRequest getOriginalSchemaRequest, - CompletableFuture response) { - LOG.info("Handling original schema request: {}", getOriginalSchemaRequest); - int schemaVersion = getOriginalSchemaRequest.getSchemaVersion(); - TableId tableId = getOriginalSchemaRequest.getTableId(); - if (schemaVersion == GetOriginalSchemaRequest.LATEST_SCHEMA_VERSION) { - response.complete( - wrap( - new GetOriginalSchemaResponse( - schemaManager.getLatestOriginalSchema(tableId).orElse(null)))); - } else { - try { - response.complete( - wrap( - new GetOriginalSchemaResponse( - schemaManager.getOriginalSchema(tableId, schemaVersion)))); - } catch (IllegalArgumentException iae) { - LOG.warn( - "Some client is requesting an non-existed original schema for table {} with version {}", - tableId, - schemaVersion); - response.complete(wrap(new GetOriginalSchemaResponse(null))); - } - } - } - - // --------------------Only visible for test ----------------- - - @VisibleForTesting - public void handleApplyOriginalSchemaChangeEvent(SchemaChangeEvent schemaChangeEvent) { - schemaManager.applyOriginalSchemaChange(schemaChangeEvent); - } - - @VisibleForTesting - public void handleApplyEvolvedSchemaChangeRequest(SchemaChangeEvent schemaChangeEvent) { - schemaManager.applyEvolvedSchemaChange(schemaChangeEvent); - } -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistryRequestHandler.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistryRequestHandler.java deleted file mode 100644 index 2faba42caef..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistryRequestHandler.java +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.coordinator; - -import org.apache.flink.cdc.common.annotation.Internal; -import org.apache.flink.cdc.common.event.AddColumnEvent; -import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; -import org.apache.flink.cdc.common.event.CreateTableEvent; -import org.apache.flink.cdc.common.event.DropColumnEvent; -import org.apache.flink.cdc.common.event.RenameColumnEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEventType; -import org.apache.flink.cdc.common.event.SchemaChangeEventWithPreSchema; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.exceptions.UnsupportedSchemaChangeEventException; -import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; -import org.apache.flink.cdc.common.schema.Column; -import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.sink.MetadataApplier; -import org.apache.flink.cdc.common.types.DataType; -import org.apache.flink.cdc.common.utils.Preconditions; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeProcessingResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResultRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeResultResponse; -import org.apache.flink.runtime.operators.coordination.CoordinationResponse; -import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.Closeable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import static org.apache.flink.cdc.runtime.operators.schema.event.CoordinationResponseUtils.wrap; - -/** A handler to deal with all requests and events for {@link SchemaRegistry}. */ -@Internal -public class SchemaRegistryRequestHandler implements Closeable { - private static final Logger LOG = LoggerFactory.getLogger(SchemaRegistryRequestHandler.class); - - /** The {@link MetadataApplier} for every table. */ - private final MetadataApplier metadataApplier; - /** All active sink writers. */ - private final Set activeSinkWriters; - /** Schema manager holding schema for all tables. */ - private final SchemaManager schemaManager; - - private final SchemaDerivation schemaDerivation; - - /** - * Atomic flag indicating if current RequestHandler could accept more schema changes for now. - */ - private volatile RequestStatus schemaChangeStatus; - - private final Object schemaChangeRequestLock; - - private volatile Throwable currentChangeException; - private volatile List currentFinishedSchemaChanges; - private volatile List currentIgnoredSchemaChanges; - - /** Sink writers which have sent flush success events for the request. */ - private final ConcurrentHashMap> flushedSinkWriters; - - /** Executor service to execute schema change. */ - private final ExecutorService schemaChangeThreadPool; - - private final SchemaChangeBehavior schemaChangeBehavior; - - private final OperatorCoordinator.Context context; - - private final int parallelism; - - public SchemaRegistryRequestHandler( - MetadataApplier metadataApplier, - SchemaManager schemaManager, - SchemaDerivation schemaDerivation, - SchemaChangeBehavior schemaChangeBehavior, - OperatorCoordinator.Context context) { - this.metadataApplier = metadataApplier; - this.schemaManager = schemaManager; - this.schemaDerivation = schemaDerivation; - this.schemaChangeBehavior = schemaChangeBehavior; - this.context = context; - - this.activeSinkWriters = ConcurrentHashMap.newKeySet(); - this.flushedSinkWriters = new ConcurrentHashMap<>(); - this.schemaChangeThreadPool = Executors.newSingleThreadExecutor(); - - this.currentFinishedSchemaChanges = new ArrayList<>(); - this.currentIgnoredSchemaChanges = new ArrayList<>(); - - this.schemaChangeStatus = RequestStatus.IDLE; - this.schemaChangeRequestLock = new Object(); - - // This check is meant to allow migration test pass since we don't have a valid - // `OperatorCoordinator.Context` in mocked environment. - this.parallelism = context != null ? context.currentParallelism() : 0; - } - - /** - * Handle the {@link SchemaChangeRequest} and wait for all sink subtasks flushing. - * - * @param request the received SchemaChangeRequest - */ - public void handleSchemaChangeRequest( - SchemaChangeRequest request, CompletableFuture response) { - - // We use nonce to identify each schema change request - long nonce = request.getNonce(); - - synchronized (schemaChangeRequestLock) { - // Make sure we handle the first request in the pending list to avoid out-of-order - // waiting and blocks checkpointing mechanism. - if (schemaChangeStatus == RequestStatus.IDLE) { - SchemaChangeEvent event = request.getSchemaChangeEvent(); - - // If this schema change event has been requested by another subTask, ignore it. - if (schemaManager.isOriginalSchemaChangeEventRedundant(event)) { - LOG.info("Event {} has been addressed before, ignoring it.", event); - clearCurrentSchemaChangeRequest(nonce); - LOG.info( - "SchemaChangeStatus is still IDLE for request {} due to duplicated request.", - request); - response.complete(wrap(SchemaChangeResponse.duplicate())); - return; - } - - if (activeSinkWriters.size() < parallelism) { - LOG.info( - "Not all active sink writers have been registered. Current {}, expected {}.", - activeSinkWriters.size(), - parallelism); - response.complete(wrap(SchemaChangeResponse.waitingForFlush())); - return; - } - - if (!activeSinkWriters.equals(flushedSinkWriters.get(nonce))) { - LOG.info( - "Not all active sink writers have completed flush (nonce: {}). Flushed writers: {}, expected: {}.", - nonce, - flushedSinkWriters.get(nonce), - activeSinkWriters); - response.complete(wrap(SchemaChangeResponse.waitingForFlush())); - return; - } - - schemaManager.applyOriginalSchemaChange(event); - List derivedSchemaChangeEvents = - calculateDerivedSchemaChangeEvents(request.getSchemaChangeEvent()); - - // If this schema change event is filtered out by LENIENT mode or merging table - // route strategies, ignore it. - if (derivedSchemaChangeEvents.isEmpty()) { - LOG.info("Event {} is omitted from sending to downstream, ignoring it.", event); - clearCurrentSchemaChangeRequest(nonce); - LOG.info( - "SchemaChangeStatus is still IDLE for request {} due to ignored request.", - request); - - response.complete(wrap(SchemaChangeResponse.ignored())); - return; - } - - LOG.info( - "SchemaChangeStatus switched from IDLE to APPLYING, other requests will be blocked."); - // This request has been accepted. - schemaChangeStatus = RequestStatus.APPLYING; - - // Backfill pre-schema info for sink applying - derivedSchemaChangeEvents.forEach( - e -> { - if (e instanceof SchemaChangeEventWithPreSchema) { - SchemaChangeEventWithPreSchema pe = - (SchemaChangeEventWithPreSchema) e; - if (!pe.hasPreSchema()) { - schemaManager - .getLatestEvolvedSchema(pe.tableId()) - .ifPresent(pe::fillPreSchema); - } - } - }); - - response.complete(wrap(SchemaChangeResponse.accepted(derivedSchemaChangeEvents))); - - LOG.info( - "All sink subtask have flushed for table {}. Start to apply schema change request {}.", - request.getTableId().toString(), - request); - schemaChangeThreadPool.submit( - () -> applySchemaChange(request.getTableId(), derivedSchemaChangeEvents)); - } else { - response.complete(wrap(SchemaChangeResponse.busy())); - } - } - } - - /** - * Apply the schema change to the external system. - * - * @param tableId the table need to change schema - * @param derivedSchemaChangeEvents list of the schema changes - */ - private void applySchemaChange( - TableId tableId, List derivedSchemaChangeEvents) { - for (SchemaChangeEvent changeEvent : derivedSchemaChangeEvents) { - if (changeEvent.getType() != SchemaChangeEventType.CREATE_TABLE) { - if (schemaChangeBehavior == SchemaChangeBehavior.IGNORE) { - currentIgnoredSchemaChanges.add(changeEvent); - continue; - } - } - if (!metadataApplier.acceptsSchemaEvolutionType(changeEvent.getType())) { - LOG.info("Ignored schema change {} to table {}.", changeEvent, tableId); - currentIgnoredSchemaChanges.add(changeEvent); - } else { - try { - metadataApplier.applySchemaChange(changeEvent); - LOG.info("Applied schema change {} to table {}.", changeEvent, tableId); - schemaManager.applyEvolvedSchemaChange(changeEvent); - currentFinishedSchemaChanges.add(changeEvent); - } catch (Throwable t) { - LOG.error( - "Failed to apply schema change {} to table {}. Caused by: {}", - changeEvent, - tableId, - t); - if (!shouldIgnoreException(t)) { - currentChangeException = t; - break; - } else { - LOG.warn( - "Failed to apply event {}, but keeps running in tolerant mode. Caused by: {}", - changeEvent, - t); - } - } - } - } - Preconditions.checkState( - schemaChangeStatus == RequestStatus.APPLYING, - "Illegal schemaChangeStatus state: should be APPLYING before applySchemaChange finishes, not " - + schemaChangeStatus); - schemaChangeStatus = RequestStatus.FINISHED; - LOG.info("SchemaChangeStatus switched from APPLYING to FINISHED."); - } - - /** - * Register a sink subtask. - * - * @param sinkSubtask the sink subtask to register - */ - public void registerSinkWriter(int sinkSubtask) { - LOG.info("Register sink subtask {}.", sinkSubtask); - activeSinkWriters.add(sinkSubtask); - } - - /** - * Record flushed sink subtasks after receiving FlushSuccessEvent. - * - * @param sinkSubtask the sink subtask succeed flushing - */ - public void flushSuccess(int sinkSubtask, long nonce) { - synchronized (schemaChangeRequestLock) { - if (!flushedSinkWriters.containsKey(nonce)) { - flushedSinkWriters.put(nonce, ConcurrentHashMap.newKeySet()); - } - flushedSinkWriters.get(nonce).add(sinkSubtask); - } - } - - public void getSchemaChangeResult( - SchemaChangeResultRequest request, CompletableFuture response) { - Preconditions.checkState( - schemaChangeStatus != RequestStatus.IDLE, - "Illegal schemaChangeStatus: should not be IDLE before getting schema change request results."); - if (schemaChangeStatus == RequestStatus.FINISHED) { - schemaChangeStatus = RequestStatus.IDLE; - LOG.info( - "SchemaChangeStatus switched from FINISHED to IDLE. (nonce: {})", - request.getNonce()); - - // This request has been finished, return it and prepare for the next request - List finishedEvents = - clearCurrentSchemaChangeRequest(request.getNonce()); - SchemaChangeResultResponse resultResponse = - new SchemaChangeResultResponse(finishedEvents); - response.complete(wrap(resultResponse)); - } else { - // Still working on schema change request, waiting it - response.complete(wrap(new SchemaChangeProcessingResponse())); - } - } - - @Override - public void close() throws IOException { - if (schemaChangeThreadPool != null) { - schemaChangeThreadPool.shutdown(); - } - } - - private List calculateDerivedSchemaChangeEvents(SchemaChangeEvent event) { - if (SchemaChangeBehavior.LENIENT.equals(schemaChangeBehavior)) { - return schemaDerivation.applySchemaChange(event).stream() - .flatMap(evt -> lenientizeSchemaChangeEvent(evt).stream()) - .collect(Collectors.toList()); - } else { - return schemaDerivation.applySchemaChange(event); - } - } - - private List lenientizeSchemaChangeEvent(SchemaChangeEvent event) { - if (event instanceof CreateTableEvent) { - return Collections.singletonList(event); - } - TableId tableId = event.tableId(); - Schema evolvedSchema = - schemaManager - .getLatestEvolvedSchema(tableId) - .orElseThrow( - () -> - new IllegalStateException( - "Evolved schema does not exist, not ready for schema change event " - + event)); - switch (event.getType()) { - case ADD_COLUMN: - { - AddColumnEvent addColumnEvent = (AddColumnEvent) event; - return Collections.singletonList( - new AddColumnEvent( - tableId, - addColumnEvent.getAddedColumns().stream() - .map( - col -> - new AddColumnEvent.ColumnWithPosition( - Column.physicalColumn( - col.getAddColumn() - .getName(), - col.getAddColumn() - .getType() - .nullable(), - col.getAddColumn() - .getComment(), - col.getAddColumn() - .getDefaultValueExpression()))) - .collect(Collectors.toList()))); - } - case DROP_COLUMN: - { - DropColumnEvent dropColumnEvent = (DropColumnEvent) event; - Map convertNullableColumns = - dropColumnEvent.getDroppedColumnNames().stream() - .map(evolvedSchema::getColumn) - .flatMap(e -> e.map(Stream::of).orElse(Stream.empty())) - .filter(col -> !col.getType().isNullable()) - .collect( - Collectors.toMap( - Column::getName, - column -> column.getType().nullable())); - - if (convertNullableColumns.isEmpty()) { - return Collections.emptyList(); - } else { - return Collections.singletonList( - new AlterColumnTypeEvent(tableId, convertNullableColumns)); - } - } - case RENAME_COLUMN: - { - RenameColumnEvent renameColumnEvent = (RenameColumnEvent) event; - List appendColumns = new ArrayList<>(); - Map convertNullableColumns = new HashMap<>(); - renameColumnEvent - .getNameMapping() - .forEach( - (key, value) -> { - Column column = - evolvedSchema - .getColumn(key) - .orElseThrow( - () -> - new IllegalArgumentException( - "Non-existed column " - + key - + " in evolved schema.")); - if (!column.getType().isNullable()) { - // It's a not-nullable column, we need to cast it to - // nullable first - convertNullableColumns.put( - key, column.getType().nullable()); - } - appendColumns.add( - new AddColumnEvent.ColumnWithPosition( - Column.physicalColumn( - value, - column.getType().nullable(), - column.getComment(), - column - .getDefaultValueExpression()))); - }); - - List events = new ArrayList<>(); - events.add(new AddColumnEvent(tableId, appendColumns)); - if (!convertNullableColumns.isEmpty()) { - events.add(new AlterColumnTypeEvent(tableId, convertNullableColumns)); - } - return events; - } - default: - return Collections.singletonList(event); - } - } - - private boolean shouldIgnoreException(Throwable throwable) { - // In IGNORE mode, will never try to apply schema change events - // In EVOLVE and LENIENT mode, such failure will not be tolerated - // In EXCEPTION mode, an exception will be thrown once captured - return (throwable instanceof UnsupportedSchemaChangeEventException) - && (schemaChangeBehavior == SchemaChangeBehavior.TRY_EVOLVE); - } - - private List clearCurrentSchemaChangeRequest(long nonce) { - if (currentChangeException != null) { - context.failJob( - new RuntimeException("Failed to apply schema change.", currentChangeException)); - } - List finishedSchemaChanges = - new ArrayList<>(currentFinishedSchemaChanges); - flushedSinkWriters.remove(nonce); - currentFinishedSchemaChanges.clear(); - currentIgnoredSchemaChanges.clear(); - currentChangeException = null; - return finishedSchemaChanges; - } - - // Schema change event state could transfer in the following way: - // - - // -------- - // | IDLE | -------------------A - // -------- | - // ^ | - // C | - // \ v - // ------------ ------------ - // | FINISHED | <-- B -- | APPLYING | - // ------------ ------------ - // - // A: When a request came to an idling request handler. Only possible when registry is IDLE, - // and it has collected all FlushEvents from sink writers. - // B: When schema change application finishes (successfully or with exceptions) - // C: When current schema change request result has been retrieved by SchemaOperator, and be - // ready for the next request. - private enum RequestStatus { - IDLE, - APPLYING, - FINISHED - } -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaCoordinator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaCoordinator.java new file mode 100755 index 00000000000..061ba613afa --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaCoordinator.java @@ -0,0 +1,437 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.distributed; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.cdc.common.annotation.VisibleForTesting; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.common.utils.Preconditions; +import org.apache.flink.cdc.common.utils.SchemaMergingUtils; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaDerivator; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaManager; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.distributed.SchemaChangeRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.distributed.SchemaChangeResponse; +import org.apache.flink.runtime.operators.coordination.CoordinationRequest; +import org.apache.flink.runtime.operators.coordination.CoordinationResponse; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; +import org.apache.flink.util.FlinkRuntimeException; + +import org.apache.flink.shaded.guava31.com.google.common.collect.HashBasedTable; +import org.apache.flink.shaded.guava31.com.google.common.collect.HashMultimap; +import org.apache.flink.shaded.guava31.com.google.common.collect.Multimap; +import org.apache.flink.shaded.guava31.com.google.common.collect.Table; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import static org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils.wrap; + +/** Coordinator node for {@link SchemaOperator}. Registry actor in Map-Reduce Topology. */ +public class SchemaCoordinator extends SchemaRegistry { + + private static final Logger LOG = LoggerFactory.getLogger(SchemaCoordinator.class); + + /** Atomic finite state machine to track global schema reducing state. */ + private transient AtomicReference reducerStatus; + + /** Request futures from pending schema mappers. */ + private transient Map< + Integer, Tuple2>> + pendingRequests; + + /** + * Transient upstream table schema. The second arity is source partition ID, because in + * Map-Reduce topology, schemas might vary among partitions, so we can't rely on {@code + * schemaManager} to store original schemas. + */ + private transient Table upstreamSchemaTable; + + /** + * This number was kept in-sync to indicate the number of global schema reducing requests that + * have been processed. Used for filtering out late-coming BlockUpstreamRequest if + * requestSchemaReduce was already emitted in `processElement` method. + */ + private transient AtomicInteger schemaMapperSeqNum; + + /** + * In Map-Reduce topology, one schema change event will be broadcast N-times (N = downstream + * parallelism). We need to effectively ignore duplicate ones since not all {@link + * SchemaChangeEvent}s are idempotent. + */ + private transient Multimap, Integer> + alreadyHandledSchemaChangeEvents; + + public SchemaCoordinator( + String operatorName, + OperatorCoordinator.Context context, + ExecutorService coordinatorExecutor, + MetadataApplier metadataApplier, + List routingRules, + Duration rpcTimeout) { + super( + context, + operatorName, + coordinatorExecutor, + metadataApplier, + routingRules, + rpcTimeout); + } + + // ----------------- + // Lifecycle methods + // ----------------- + @Override + public void start() throws Exception { + super.start(); + this.reducerStatus = new AtomicReference<>(RequestStatus.IDLE); + this.pendingRequests = new ConcurrentHashMap<>(); + this.upstreamSchemaTable = HashBasedTable.create(); + this.schemaMapperSeqNum = new AtomicInteger(0); + this.alreadyHandledSchemaChangeEvents = HashMultimap.create(); + LOG.info( + "Started SchemaRegistry for {}. Parallelism: {}", operatorName, currentParallelism); + } + + // -------------------------- + // Checkpoint related methods + // -------------------------- + @Override + protected void snapshot(CompletableFuture resultFuture) throws Exception { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos)) { + // Serialize SchemaManager + int schemaManagerSerializerVersion = SchemaManager.SERIALIZER.getVersion(); + out.writeInt(schemaManagerSerializerVersion); + byte[] serializedSchemaManager = SchemaManager.SERIALIZER.serialize(schemaManager); + out.writeInt(serializedSchemaManager.length); + out.write(serializedSchemaManager); + resultFuture.complete(baos.toByteArray()); + } + } + + @Override + protected void restore(byte[] checkpointData) throws Exception { + try (ByteArrayInputStream bais = new ByteArrayInputStream(checkpointData); + DataInputStream in = new DataInputStream(bais)) { + int schemaManagerSerializerVersion = in.readInt(); + int length = in.readInt(); + byte[] serializedSchemaManager = new byte[length]; + in.readFully(serializedSchemaManager); + schemaManager = + SchemaManager.SERIALIZER.deserialize( + schemaManagerSerializerVersion, serializedSchemaManager); + } + } + + // ------------------------- + // Event handler entrances (for schema mappers and sink operators) + // ------------------------- + @Override + protected void handleGetOriginalSchemaRequest( + GetOriginalSchemaRequest request, + CompletableFuture responseFuture) { + throw new UnsupportedOperationException( + "In Map-Reduce topology, there's no centralized upstream schema table since they may evolve independently in various partitions."); + } + + @Override + protected void handleCustomCoordinationRequest( + CoordinationRequest request, CompletableFuture responseFuture) + throws Exception { + if (request instanceof SchemaChangeRequest) { + handleReduceSchemaRequest((SchemaChangeRequest) request, responseFuture); + } else { + throw new UnsupportedOperationException( + "Unknown coordination request type: " + request); + } + } + + @Override + protected void handleUnrecoverableError(String taskDescription, Throwable t) { + super.handleUnrecoverableError(taskDescription, t); + LOG.info("Current upstream table state: {}", upstreamSchemaTable); + pendingRequests.forEach( + (index, tuple) -> { + tuple.f1.completeExceptionally(t); + }); + } + + // ------------------------- + // Schema reducing logic + // ------------------------- + + private void handleReduceSchemaRequest( + SchemaChangeRequest request, CompletableFuture responseFuture) + throws Exception { + LOG.info("Reducer received schema reduce request {}.", request); + if (!request.isNoOpRequest()) { + LOG.info("It's not an align request, will try to deduplicate."); + int eventSourcePartitionId = request.getSourceSubTaskId(); + int handlingSinkSubTaskId = request.getSinkSubTaskId(); + SchemaChangeEvent schemaChangeEvent = request.getSchemaChangeEvent(); + Tuple2 uniqueKey = + Tuple2.of(eventSourcePartitionId, schemaChangeEvent); + // Due to the existence of Partitioning Operator, any upstream event will be broadcast + // to sink N (N = sink parallelism) times. + // Only the first one should take effect, so we rewrite any other duplicated requests as + // a no-op align request. + alreadyHandledSchemaChangeEvents.put(uniqueKey, handlingSinkSubTaskId); + Collection reportedSinkSubTasks = + alreadyHandledSchemaChangeEvents.get(uniqueKey); + if (reportedSinkSubTasks.size() == 1) { + LOG.info("It's a new request for {}, will handle it", uniqueKey); + updateUpstreamSchemaTable( + schemaChangeEvent.tableId(), + request.getSourceSubTaskId(), + schemaChangeEvent); + } else { + LOG.info( + "It's an already handled event {}. It has been handled by {}", + uniqueKey, + reportedSinkSubTasks); + request = SchemaChangeRequest.createNoOpRequest(handlingSinkSubTaskId); + } + // Moreover, if we've collected all sink subTasks' request, remove it from memory since + // no more will be possible. + if (reportedSinkSubTasks.size() == currentParallelism) { + LOG.info( + "All sink subTasks ({}) have already reported request {}. Remove it out of tracking.", + reportedSinkSubTasks, + uniqueKey); + alreadyHandledSchemaChangeEvents.removeAll(request); + } + } + + pendingRequests.put(request.getSinkSubTaskId(), Tuple2.of(request, responseFuture)); + + if (pendingRequests.size() == 1) { + Preconditions.checkState( + reducerStatus.compareAndSet( + RequestStatus.IDLE, RequestStatus.WAITING_FOR_FLUSH), + "Unexpected reducer status: " + reducerStatus.get()); + LOG.info( + "Received the very-first schema reduce request {}. Switching from IDLE to WAITING_FOR_FLUSH.", + request); + } + + // No else if, since currentParallelism might be == 1 + if (pendingRequests.size() == currentParallelism) { + Preconditions.checkState( + reducerStatus.compareAndSet( + RequestStatus.WAITING_FOR_FLUSH, RequestStatus.EVOLVING), + "Unexpected reducer status: " + reducerStatus.get()); + LOG.info( + "Received the last required schema reduce request {}. Switching from WAITING_FOR_FLUSH to EVOLVING.", + request); + startSchemaChangesReduce(); + } + } + + /** + * Tries to apply schema change event {@code schemaChangeEvent} to the combination of {@code + * tableId} and {@code sourcePartition}. Returns {@code true} if schema got changed, or {@code + * false} if nothing gets touched. + */ + private void updateUpstreamSchemaTable( + TableId tableId, int sourcePartition, SchemaChangeEvent schemaChangeEvent) { + Schema oldSchema = upstreamSchemaTable.get(tableId, sourcePartition); + upstreamSchemaTable.put( + tableId, + sourcePartition, + SchemaUtils.applySchemaChangeEvent(oldSchema, schemaChangeEvent)); + } + + private void startSchemaChangesReduce() throws TimeoutException { + LOG.info("Starting to reduce schema. "); + loopWhen( + () -> flushedSinkWriters.size() < currentParallelism, + () -> + LOG.info( + "Not all sink writers have successfully flushed. Expected {}, actual {}", + currentParallelism, + flushedSinkWriters), + rpcTimeout, + Duration.ofMillis(100)); + + LOG.info("All flushed. Going to reduce schema for pending requests: {}", pendingRequests); + flushedSinkWriters.clear(); + + // Deduce what schema change events should be applied to sink table + List deducedSchemaChangeEvents = deduceEvolvedSchemaChanges(); + + // And tries to apply it to external system + List successfullyAppliedSchemaChangeEvents = new ArrayList<>(); + for (SchemaChangeEvent appliedSchemaChangeEvent : deducedSchemaChangeEvents) { + if (applyAndUpdateEvolvedSchemaChange(appliedSchemaChangeEvent)) { + successfullyAppliedSchemaChangeEvents.add(appliedSchemaChangeEvent); + } + } + + // Then, we increment the seqNum, broadcast affected schema changes to mapper, and release + // upstream + int nextSeqNum = schemaMapperSeqNum.incrementAndGet(); + pendingRequests.forEach( + (subTaskId, tuple) -> { + LOG.info("Reducer finishes pending future from {}", subTaskId); + tuple.f1.complete( + wrap( + new SchemaChangeResponse( + successfullyAppliedSchemaChangeEvents, nextSeqNum))); + }); + + pendingRequests.clear(); + + LOG.info("Finished schema evolving. Switching from EVOLVING to IDLE."); + Preconditions.checkState( + reducerStatus.compareAndSet(RequestStatus.EVOLVING, RequestStatus.IDLE), + "RequestStatus should be EVOLVING when schema reducing finishes."); + } + + private List deduceEvolvedSchemaChanges() { + List validSchemaReduceRequests = + pendingRequests.values().stream() + .map(e -> e.f0) + .filter( + request -> + !request.isNoOpRequest()) // Ignore alignment only requests + .collect(Collectors.toList()); + + // Firstly, based on changed upstream tables, infer a set of sink tables that might be + // affected by this event. Schema changes will be derived individually for each sink table. + Set affectedSinkTableIds = + SchemaDerivator.getAffectedEvolvedTables( + router, + validSchemaReduceRequests.stream() + .map(rsr -> rsr.getSchemaChangeEvent().tableId()) + .collect(Collectors.toSet())); + + List evolvedSchemaChanges = new ArrayList<>(); + + // For each affected sink table, we may... + for (TableId affectedSinkTableId : affectedSinkTableIds) { + + Schema currentSinkSchema = + schemaManager.getLatestEvolvedSchema(affectedSinkTableId).orElse(null); + + // ... reversely look up this affected sink table's upstream dependency. + Set upstreamDependencies = + SchemaDerivator.reverseLookupDependingUpstreamTables( + router, affectedSinkTableId, upstreamSchemaTable); + + Preconditions.checkState( + !upstreamDependencies.isEmpty(), + "An affected sink table's upstream dependency cannot be empty."); + + // Then, grab all upstream schemas from all known partitions and merge them. + Set toBeMergedSchemas = + SchemaDerivator.reverseLookupDependingUpstreamSchemas( + router, affectedSinkTableId, upstreamSchemaTable); + + // In reducing mode, schema will never be narrowed because current schema is always one + // of the merging base. Notice that current schema might be NULL if it's the first + // time we met a CreateTableEvent. + Schema mergedSchema = currentSinkSchema; + for (Schema toBeMergedSchema : toBeMergedSchemas) { + mergedSchema = + SchemaMergingUtils.getLeastCommonSchema(mergedSchema, toBeMergedSchema); + } + + // Detect what schema changes we need to apply to get expected sink table. + List localEvolvedSchemaChanges = + SchemaMergingUtils.getSchemaDifference( + affectedSinkTableId, currentSinkSchema, mergedSchema); + + // Finally, we normalize schema change events, including rewriting events by current + // schema change behavior configuration, dropping explicitly excluded schema change + // event types. + evolvedSchemaChanges.addAll( + SchemaDerivator.normalizeSchemaChangeEvents( + currentSinkSchema, + localEvolvedSchemaChanges, + SchemaChangeBehavior.LENIENT, + metadataApplier)); + } + + return evolvedSchemaChanges; + } + + private boolean applyAndUpdateEvolvedSchemaChange(SchemaChangeEvent schemaChangeEvent) { + try { + metadataApplier.applySchemaChange(schemaChangeEvent); + schemaManager.applyEvolvedSchemaChange(schemaChangeEvent); + LOG.info( + "Successfully applied schema change event {} to external system.", + schemaChangeEvent); + return true; + } catch (Throwable t) { + handleUnrecoverableError( + "Apply schema change event - " + schemaChangeEvent, + new FlinkRuntimeException( + "Failed to apply schema change event " + schemaChangeEvent + ".", t)); + context.failJob(t); + throw t; + } + } + + // ------------------------- + // Utilities + // ------------------------- + + /** + * {@code IDLE}: Initial idling state, ready for requests.
+ * {@code WAITING_FOR_FLUSH}: Waiting for all mappers to block & collecting enough FlushEvents. + *
+ * {@code EVOLVING}: Applying schema change to sink. + */ + private enum RequestStatus { + IDLE, + WAITING_FOR_FLUSH, + EVOLVING + } + + @VisibleForTesting + public void emplaceOriginalSchema(TableId tableId, Integer subTaskId, Schema schema) { + upstreamSchemaTable.put(tableId, subTaskId, schema); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaCoordinatorProvider.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaCoordinatorProvider.java new file mode 100755 index 00000000000..392906a6cff --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaCoordinatorProvider.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.distributed; + +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.runtime.operators.schema.common.CoordinatorExecutorThreadFactory; +import org.apache.flink.runtime.jobgraph.OperatorID; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; + +import java.time.Duration; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +/** Provider for {@link SchemaCoordinator}. */ +public class SchemaCoordinatorProvider implements OperatorCoordinator.Provider { + private static final long serialVersionUID = 1L; + + private final OperatorID operatorID; + private final String operatorName; + private final MetadataApplier metadataApplier; + private final List routingRules; + private final Duration rpcTimeout; + + public SchemaCoordinatorProvider( + OperatorID operatorID, + String operatorName, + MetadataApplier metadataApplier, + List routingRules, + Duration rpcTimeout) { + this.operatorID = operatorID; + this.operatorName = operatorName; + this.metadataApplier = metadataApplier; + this.routingRules = routingRules; + this.rpcTimeout = rpcTimeout; + } + + @Override + public OperatorID getOperatorId() { + return operatorID; + } + + @Override + public OperatorCoordinator create(OperatorCoordinator.Context context) throws Exception { + CoordinatorExecutorThreadFactory coordinatorThreadFactory = + new CoordinatorExecutorThreadFactory( + "schema-evolution-coordinator", context.getUserCodeClassloader()); + ExecutorService coordinatorExecutor = + Executors.newSingleThreadExecutor(coordinatorThreadFactory); + return new SchemaCoordinator( + operatorName, + context, + coordinatorExecutor, + metadataApplier, + routingRules, + rpcTimeout); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaOperator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaOperator.java new file mode 100755 index 00000000000..2eafb65c801 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaOperator.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.distributed; + +import org.apache.flink.cdc.common.event.DataChangeEvent; +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.FlushEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaDerivator; +import org.apache.flink.cdc.runtime.operators.schema.common.TableIdRouter; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils; +import org.apache.flink.cdc.runtime.operators.schema.common.event.distributed.SchemaChangeRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.distributed.SchemaChangeResponse; +import org.apache.flink.cdc.runtime.partitioning.PartitioningEvent; +import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway; +import org.apache.flink.runtime.operators.coordination.CoordinationRequest; +import org.apache.flink.runtime.operators.coordination.CoordinationResponse; +import org.apache.flink.runtime.operators.coordination.OperatorEvent; +import org.apache.flink.runtime.operators.coordination.OperatorEventHandler; +import org.apache.flink.streaming.api.graph.StreamConfig; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.StreamTask; +import org.apache.flink.util.SerializedValue; + +import org.apache.flink.shaded.guava31.com.google.common.collect.HashBasedTable; +import org.apache.flink.shaded.guava31.com.google.common.collect.Table; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.time.Duration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +/** This operator merges upstream inferred schema into a centralized Schema Registry. */ +public class SchemaOperator extends AbstractStreamOperator + implements OneInputStreamOperator, + OperatorEventHandler, + Serializable { + + private static final Logger LOG = LoggerFactory.getLogger(SchemaOperator.class); + + // Final fields that are set upon construction + private final Duration rpcTimeOut; + private final String timezone; + private final List routingRules; + + public SchemaOperator(List routingRules, Duration rpcTimeOut, String timezone) { + this.routingRules = routingRules; + this.rpcTimeOut = rpcTimeOut; + this.timezone = timezone; + } + + // Transient fields that are set when operator is running + private transient TaskOperatorEventGateway toCoordinator; + private transient int subTaskId; + + // Records TableId and its integer. + private transient volatile Table upstreamSchemaTable; + private transient volatile Map evolvedSchemaMap; + private transient TableIdRouter tableIdRouter; + private transient volatile int schemaMapperSeqNum; + + @Override + public void open() throws Exception { + super.open(); + subTaskId = getRuntimeContext().getTaskInfo().getIndexOfThisSubtask(); + upstreamSchemaTable = HashBasedTable.create(); + evolvedSchemaMap = new HashMap<>(); + tableIdRouter = new TableIdRouter(routingRules); + schemaMapperSeqNum = 0; + } + + @Override + public void setup( + StreamTask containingTask, + StreamConfig config, + Output> output) { + super.setup(containingTask, config, output); + this.toCoordinator = containingTask.getEnvironment().getOperatorCoordinatorEventGateway(); + } + + @Override + public void processElement(StreamRecord streamRecord) throws Exception { + // Unpack partitioned events + PartitioningEvent partitioningEvent = streamRecord.getValue(); + Event event = partitioningEvent.getPayload(); + int sourcePartition = partitioningEvent.getSourcePartition(); + + if (event instanceof SchemaChangeEvent) { + SchemaChangeEvent schemaChangeEvent = (SchemaChangeEvent) event; + TableId tableId = schemaChangeEvent.tableId(); + + // First, update upstream schema map unconditionally and it will never fail + Schema beforeSchema = upstreamSchemaTable.get(tableId, sourcePartition); + Schema afterSchema = + SchemaUtils.applySchemaChangeEvent(beforeSchema, schemaChangeEvent); + upstreamSchemaTable.put(tableId, sourcePartition, afterSchema); + + // Then, notify this information to the reducer + requestSchemaReduce( + new SchemaChangeRequest(sourcePartition, subTaskId, schemaChangeEvent)); + } else if (event instanceof DataChangeEvent) { + DataChangeEvent dataChangeEvent = (DataChangeEvent) event; + TableId tableId = dataChangeEvent.tableId(); + + // First, we obtain the upstream schema corresponding to this data change event + Schema upstreamSchema = + upstreamSchemaTable.get(dataChangeEvent.tableId(), sourcePartition); + + // Then, for each routing terminus, coerce data records to the expected schema + for (TableId sinkTableId : tableIdRouter.route(tableId)) { + Schema evolvedSchema = evolvedSchemaMap.get(sinkTableId); + + DataChangeEvent coercedDataRecord = + SchemaDerivator.coerceDataRecord( + timezone, + DataChangeEvent.route(dataChangeEvent, sinkTableId), + upstreamSchema, + evolvedSchema) + .orElseThrow( + () -> + new IllegalStateException( + String.format( + "Unable to coerce data record from %s (schema: %s) to %s (schema: %s)", + tableId, + upstreamSchema, + sinkTableId, + evolvedSchema))); + output.collect(new StreamRecord<>(coercedDataRecord)); + } + } else { + throw new IllegalStateException( + subTaskId + "> SchemaMapper received an unexpected event: " + event); + } + } + + @Override + public void handleOperatorEvent(OperatorEvent event) { + throw new IllegalArgumentException("Unexpected operator event: " + event); + } + + private void requestSchemaReduce(SchemaChangeRequest reduceSchemaRequest) { + LOG.info("{}> Sent FlushEvent to downstream...", subTaskId); + output.collect(new StreamRecord<>(FlushEvent.ofAll())); + + LOG.info("{}> Sending reduce request...", subTaskId); + SchemaChangeResponse response = sendRequestToCoordinator(reduceSchemaRequest); + + LOG.info("{}> Reduce request response: {}", subTaskId, response); + + // Update local evolved schema cache + response.getReducedSchemaResult() + .forEach( + schemaChangeEvent -> + evolvedSchemaMap.compute( + schemaChangeEvent.tableId(), + (tableId, schema) -> + SchemaUtils.applySchemaChangeEvent( + schema, schemaChangeEvent))); + + // And emit schema change events to downstream + response.getReducedSchemaResult().forEach(evt -> output.collect(new StreamRecord<>(evt))); + + schemaMapperSeqNum = response.getReduceSeqNum(); + LOG.info( + "{}> Successfully updated evolved schema cache. Current state: {} at version {}", + subTaskId, + evolvedSchemaMap, + schemaMapperSeqNum); + } + + private + RESPONSE sendRequestToCoordinator(REQUEST request) { + try { + CompletableFuture responseFuture = + toCoordinator.sendRequestToCoordinator( + getOperatorID(), new SerializedValue<>(request)); + return CoordinationResponseUtils.unwrap( + responseFuture.get(rpcTimeOut.toMillis(), TimeUnit.MILLISECONDS)); + } catch (Exception e) { + throw new IllegalStateException( + "Failed to send request to coordinator: " + request.toString(), e); + } + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaOperatorFactory.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaOperatorFactory.java new file mode 100755 index 00000000000..6302f198769 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaOperatorFactory.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.distributed; + +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.runtime.partitioning.PartitioningEvent; +import org.apache.flink.runtime.jobgraph.OperatorID; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; +import org.apache.flink.runtime.operators.coordination.OperatorEventDispatcher; +import org.apache.flink.streaming.api.operators.CoordinatedOperatorFactory; +import org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory; +import org.apache.flink.streaming.api.operators.SimpleOperatorFactory; +import org.apache.flink.streaming.api.operators.StreamOperator; +import org.apache.flink.streaming.api.operators.StreamOperatorParameters; + +import java.time.Duration; +import java.util.List; + +/** Factory to create {@link SchemaOperator}. */ +public class SchemaOperatorFactory extends SimpleOperatorFactory + implements CoordinatedOperatorFactory, + OneInputStreamOperatorFactory { + private static final long serialVersionUID = 1L; + + private final MetadataApplier metadataApplier; + private final List routingRules; + private final Duration rpcTimeout; + + public SchemaOperatorFactory( + MetadataApplier metadataApplier, + List routingRules, + Duration rpcTimeout, + String timezone) { + super(new SchemaOperator(routingRules, rpcTimeout, timezone)); + this.metadataApplier = metadataApplier; + this.routingRules = routingRules; + this.rpcTimeout = rpcTimeout; + } + + @Override + @SuppressWarnings("unchecked") + public > T createStreamOperator( + StreamOperatorParameters parameters) { + SchemaOperator mapper = super.createStreamOperator(parameters); + final OperatorID operatorId = parameters.getStreamConfig().getOperatorID(); + final OperatorEventDispatcher eventDispatcher = parameters.getOperatorEventDispatcher(); + eventDispatcher.registerEventHandler(operatorId, mapper); + return (T) mapper; + } + + @Override + public OperatorCoordinator.Provider getCoordinatorProvider( + String operatorName, OperatorID operatorID) { + return new SchemaCoordinatorProvider( + operatorID, operatorName, metadataApplier, routingRules, rpcTimeout); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeProcessingResponse.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeProcessingResponse.java deleted file mode 100644 index bf8d84f35a6..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeProcessingResponse.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.event; - -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.apache.flink.runtime.operators.coordination.CoordinationResponse; - -/** - * The response for {@link SchemaChangeResultRequest} from {@link SchemaRegistry} to {@link - * SchemaOperator} if not apply {@link SchemaChangeEvent} in time. - */ -public class SchemaChangeProcessingResponse implements CoordinationResponse { - - private static final long serialVersionUID = 1L; -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResultRequest.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResultRequest.java deleted file mode 100644 index 8160494ce9f..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResultRequest.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.event; - -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.apache.flink.runtime.operators.coordination.CoordinationRequest; - -/** - * request from {@link SchemaOperator} to {@link SchemaRegistry} for getting result of applying - * schema change. - */ -public class SchemaChangeResultRequest implements CoordinationRequest { - - private static final long serialVersionUID = 1L; - - /** - * Nonce code to distinguish flush events corresponding to each schema change event from - * different subTasks. - */ - private final long nonce; - - public SchemaChangeResultRequest(long nonce) { - this.nonce = nonce; - } - - public long getNonce() { - return nonce; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - SchemaChangeResultRequest that = (SchemaChangeResultRequest) o; - return nonce == that.nonce; - } - - @Override - public int hashCode() { - return Long.hashCode(nonce); - } -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResultResponse.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResultResponse.java deleted file mode 100644 index 7039ef086ec..00000000000 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/event/SchemaChangeResultResponse.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.event; - -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.apache.flink.runtime.operators.coordination.CoordinationResponse; - -import java.util.List; -import java.util.Objects; - -/** - * The response for {@link SchemaChangeResultRequest} from {@link SchemaRegistry} to {@link - * SchemaOperator}. - */ -public class SchemaChangeResultResponse implements CoordinationResponse { - - private static final long serialVersionUID = 1L; - - /** - * Whether the SchemaOperator need to buffer data and the SchemaOperatorCoordinator need to wait - * for flushing. - */ - private final List finishedSchemaChangeEvents; - - public SchemaChangeResultResponse(List finishedSchemaChangeEvents) { - this.finishedSchemaChangeEvents = finishedSchemaChangeEvents; - } - - public List getFinishedSchemaChangeEvents() { - return finishedSchemaChangeEvents; - } - - @Override - public String toString() { - return "ReleaseUpstreamResponse{" - + "finishedSchemaChangeEvents=" - + finishedSchemaChangeEvents - + '}'; - } - - @Override - public boolean equals(Object object) { - if (this == object) { - return true; - } - if (object == null || getClass() != object.getClass()) { - return false; - } - SchemaChangeResultResponse that = (SchemaChangeResultResponse) object; - return Objects.equals(finishedSchemaChangeEvents, that.finishedSchemaChangeEvents); - } - - @Override - public int hashCode() { - return Objects.hash(finishedSchemaChangeEvents); - } -} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaCoordinator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaCoordinator.java new file mode 100644 index 00000000000..ff50a841766 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaCoordinator.java @@ -0,0 +1,481 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.regular; + +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.exceptions.SchemaEvolveException; +import org.apache.flink.cdc.common.exceptions.UnsupportedSchemaChangeEventException; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.common.utils.Preconditions; +import org.apache.flink.cdc.common.utils.SchemaMergingUtils; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaDerivator; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaManager; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.FlushSuccessEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.regular.SchemaChangeRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.regular.SchemaChangeResponse; +import org.apache.flink.cdc.runtime.serializer.TableIdSerializer; +import org.apache.flink.core.memory.DataInputViewStreamWrapper; +import org.apache.flink.runtime.operators.coordination.CoordinationRequest; +import org.apache.flink.runtime.operators.coordination.CoordinationResponse; +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; +import org.apache.flink.util.FlinkRuntimeException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; + +import static org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils.wrap; + +/** Coordinator node for {@link SchemaOperator}. Registry actor in Operator-Coordinator Topology. */ +public class SchemaCoordinator extends SchemaRegistry { + private static final Logger LOG = LoggerFactory.getLogger(SchemaCoordinator.class); + + /** Globally configured schema change behavior. */ + private final SchemaChangeBehavior behavior; + + /** Lock used for race controlling during schema change request handling. */ + private final Object schemaChangeRequestLock; + + /** Executor service to execute schema change. */ + private final ExecutorService schemaChangeThreadPool; + + /** + * Atomic flag indicating if current RequestHandler could accept more schema changes for now. + */ + private volatile RequestStatus schemaChangeStatus; + + /** Sink writers which have sent flush success events for the request. */ + private volatile ConcurrentHashMap> flushedSinkWriters; + + /** Currently handling request's completable future. */ + private volatile CompletableFuture pendingResponseFuture; + + // Static fields + public SchemaCoordinator( + String operatorName, + OperatorCoordinator.Context context, + ExecutorService coordinatorExecutor, + MetadataApplier metadataApplier, + List routes, + SchemaChangeBehavior schemaChangeBehavior, + Duration rpcTimeout) { + super(context, operatorName, coordinatorExecutor, metadataApplier, routes, rpcTimeout); + this.behavior = schemaChangeBehavior; + this.schemaChangeRequestLock = new Object(); + this.schemaChangeThreadPool = Executors.newSingleThreadExecutor(); + } + + @Override + public void start() throws Exception { + super.start(); + this.flushedSinkWriters = new ConcurrentHashMap<>(); + this.schemaChangeStatus = RequestStatus.IDLE; + } + + @Override + public void close() throws Exception { + super.close(); + if (schemaChangeThreadPool != null && !schemaChangeThreadPool.isShutdown()) { + schemaChangeThreadPool.shutdownNow(); + } + } + + @Override + protected void snapshot(CompletableFuture resultFuture) throws Exception { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos)) { + // Serialize SchemaManager + int schemaManagerSerializerVersion = SchemaManager.SERIALIZER.getVersion(); + out.writeInt(schemaManagerSerializerVersion); + byte[] serializedSchemaManager = SchemaManager.SERIALIZER.serialize(schemaManager); + out.writeInt(serializedSchemaManager.length); + out.write(serializedSchemaManager); + + // Length-bit for SchemaDerivation, which is no longer necessary. + out.writeInt(0); + resultFuture.complete(baos.toByteArray()); + } + } + + @Override + protected void restore(byte[] checkpointData) throws Exception { + try (ByteArrayInputStream bais = new ByteArrayInputStream(checkpointData); + DataInputStream in = new DataInputStream(bais)) { + int schemaManagerSerializerVersion = in.readInt(); + + switch (schemaManagerSerializerVersion) { + case 0: + { + int length = in.readInt(); + byte[] serializedSchemaManager = new byte[length]; + in.readFully(serializedSchemaManager); + schemaManager = + SchemaManager.SERIALIZER.deserialize( + schemaManagerSerializerVersion, serializedSchemaManager); + break; + } + case 1: + case 2: + { + int length = in.readInt(); + byte[] serializedSchemaManager = new byte[length]; + in.readFully(serializedSchemaManager); + schemaManager = + SchemaManager.SERIALIZER.deserialize( + schemaManagerSerializerVersion, serializedSchemaManager); + consumeUnusedSchemaDerivationBytes(in); + break; + } + default: + throw new IOException( + "Unrecognized serialization version " + schemaManagerSerializerVersion); + } + } + } + + @Override + protected void handleCustomCoordinationRequest( + CoordinationRequest request, CompletableFuture responseFuture) { + if (request instanceof SchemaChangeRequest) { + handleSchemaChangeRequest((SchemaChangeRequest) request, responseFuture); + } else { + throw new UnsupportedOperationException( + "Unknown coordination request type: " + request); + } + } + + @Override + protected void handleFlushSuccessEvent(FlushSuccessEvent event) { + int sinkSubtask = event.getSubtask(); + long nonce = event.getNonce(); + LOG.info("Sink subtask {} succeed flushing with nonce {}.", sinkSubtask, nonce); + synchronized (schemaChangeRequestLock) { + if (!flushedSinkWriters.containsKey(nonce)) { + flushedSinkWriters.put(nonce, ConcurrentHashMap.newKeySet()); + } + flushedSinkWriters.get(nonce).add(sinkSubtask); + LOG.info( + "Currently flushed sink writers for nonce {} are: {}", + nonce, + flushedSinkWriters.get(nonce)); + } + } + + @Override + protected void handleUnrecoverableError(String taskDescription, Throwable t) { + super.handleUnrecoverableError(taskDescription, t); + + // There's a pending future, release it exceptionally before quitting + if (pendingResponseFuture != null) { + pendingResponseFuture.completeExceptionally(t); + } + } + + /** + * Handle the {@link SchemaChangeRequest} and wait for all sink subtasks flushing. + * + * @param request the received SchemaChangeRequest + */ + public void handleSchemaChangeRequest( + SchemaChangeRequest request, CompletableFuture responseFuture) { + + // We use nonce to identify each schema change request + long nonce = request.getNonce(); + + synchronized (schemaChangeRequestLock) { + if (schemaChangeStatus == RequestStatus.IDLE) { + if (activeSinkWriters.size() < currentParallelism) { + LOG.info( + "Not all active sink writers have been registered. Current {}, expected {}.", + activeSinkWriters.size(), + currentParallelism); + responseFuture.complete(wrap(SchemaChangeResponse.waitingForFlush())); + return; + } + + if (!activeSinkWriters.equals(flushedSinkWriters.get(nonce))) { + LOG.info( + "Not all active sink writers have completed flush (nonce: {}). Flushed writers: {}, expected: {}.", + nonce, + flushedSinkWriters.get(nonce), + activeSinkWriters); + responseFuture.complete(wrap(SchemaChangeResponse.waitingForFlush())); + return; + } + + LOG.info( + "All sink writers have flushed for nonce {}. Switching to APPLYING state and starting schema evolution...", + nonce); + flushedSinkWriters.remove(nonce); + schemaChangeStatus = RequestStatus.APPLYING; + pendingResponseFuture = responseFuture; + startSchemaChangesEvolve(request, responseFuture); + } else { + responseFuture.complete(wrap(SchemaChangeResponse.busy())); + } + } + } + + private void startSchemaChangesEvolve( + SchemaChangeRequest request, CompletableFuture responseFuture) { + SchemaChangeEvent originalEvent = request.getSchemaChangeEvent(); + TableId originalTableId = originalEvent.tableId(); + Schema currentUpstreamSchema = + schemaManager.getLatestOriginalSchema(originalTableId).orElse(null); + + List deducedSchemaChangeEvents = new ArrayList<>(); + + // For redundant schema change events (possibly coming from duplicate emitted + // CreateTableEvents in snapshot stage), we just skip them. + if (!SchemaUtils.isSchemaChangeEventRedundant(currentUpstreamSchema, originalEvent)) { + schemaManager.applyOriginalSchemaChange(originalEvent); + deducedSchemaChangeEvents.addAll(deduceEvolvedSchemaChanges(originalEvent)); + } else { + LOG.info( + "Schema change event {} is redundant for current schema {}, just skip it.", + originalEvent, + currentUpstreamSchema); + } + + LOG.info( + "All sink subtask have flushed for table {}. Start to apply schema change request: \n\t{}\nthat extracts to:\n\t{}", + request.getTableId().toString(), + request, + deducedSchemaChangeEvents.stream() + .map(SchemaChangeEvent::toString) + .collect(Collectors.joining("\n\t"))); + schemaChangeThreadPool.submit( + () -> { + try { + applySchemaChange(originalEvent, deducedSchemaChangeEvents); + } catch (Throwable t) { + failJob( + "Schema change applying task", + new FlinkRuntimeException( + "Failed to apply schema change event.", t)); + throw t; + } + }); + } + + private List deduceEvolvedSchemaChanges(SchemaChangeEvent event) { + TableId originalTableId = event.tableId(); + + List deducedSchemaChangeEvents = new ArrayList<>(); + + Set originalTables = schemaManager.getAllOriginalTables(); + + // First, grab all affected evolved tables. + Set affectedEvolvedTables = + SchemaDerivator.getAffectedEvolvedTables( + router, Collections.singleton(originalTableId)); + + // For each affected table, we need to... + for (TableId evolvedTableId : affectedEvolvedTables) { + Schema currentEvolvedSchema = + schemaManager.getLatestEvolvedSchema(evolvedTableId).orElse(null); + + // ... reversely look up this affected sink table's upstream dependency + Set upstreamDependencies = + SchemaDerivator.reverseLookupDependingUpstreamTables( + router, evolvedTableId, originalTables); + Preconditions.checkArgument( + !upstreamDependencies.isEmpty(), + "An affected sink table's upstream dependency cannot be empty."); + + List rawSchemaChangeEvents = new ArrayList<>(); + if (upstreamDependencies.size() == 1) { + // If it's a one-by-one routing rule, we can simply forward it to downstream sink. + rawSchemaChangeEvents.add(event.copy(evolvedTableId)); + } else { + Set toBeMergedSchemas = + SchemaDerivator.reverseLookupDependingUpstreamSchemas( + router, evolvedTableId, schemaManager); + + // We're in a table routing mode now, so we need to infer a widest schema for all + // upstream tables. + Schema mergedSchema = currentEvolvedSchema; + for (Schema toBeMergedSchema : toBeMergedSchemas) { + mergedSchema = + SchemaMergingUtils.getLeastCommonSchema(mergedSchema, toBeMergedSchema); + } + + // Detect what schema changes we need to apply to get expected sink table. + rawSchemaChangeEvents.addAll( + SchemaMergingUtils.getSchemaDifference( + evolvedTableId, currentEvolvedSchema, mergedSchema)); + } + + // Finally, we normalize schema change events, including rewriting events by current + // schema change behavior configuration, dropping explicitly excluded schema change + // event types. + deducedSchemaChangeEvents.addAll( + SchemaDerivator.normalizeSchemaChangeEvents( + currentEvolvedSchema, + rawSchemaChangeEvents, + behavior, + metadataApplier)); + } + + return deducedSchemaChangeEvents; + } + + /** Applies the schema change to the external system. */ + private void applySchemaChange( + SchemaChangeEvent originalEvent, List deducedSchemaChangeEvents) { + if (SchemaChangeBehavior.EXCEPTION.equals(behavior)) { + if (deducedSchemaChangeEvents.stream() + .anyMatch(evt -> !(evt instanceof CreateTableEvent))) { + SchemaChangeEvent unacceptableSchemaChangeEvent = + deducedSchemaChangeEvents.stream() + .filter(evt -> !(evt instanceof CreateTableEvent)) + .findAny() + .get(); + throw new SchemaEvolveException( + unacceptableSchemaChangeEvent, + "Unexpected schema change events occurred in EXCEPTION mode. Job will fail now."); + } + } + + // Tries to apply it to external system + List appliedSchemaChangeEvents = new ArrayList<>(); + for (SchemaChangeEvent event : deducedSchemaChangeEvents) { + if (applyAndUpdateEvolvedSchemaChange(event)) { + appliedSchemaChangeEvents.add(event); + } + } + + Map refreshedEvolvedSchemas = new HashMap<>(); + + // We need to retrieve all possibly modified evolved schemas and refresh SchemaOperator's + // local cache since it might have been altered by another SchemaOperator instance. + // SchemaChangeEvents doesn't need to be emitted to downstream (since it might be broadcast + // from other SchemaOperators) though. + for (TableId tableId : router.route(originalEvent.tableId())) { + refreshedEvolvedSchemas.put( + tableId, schemaManager.getLatestEvolvedSchema(tableId).orElse(null)); + } + + // And returns all successfully applied schema change events to SchemaOperator. + pendingResponseFuture.complete( + wrap( + SchemaChangeResponse.success( + appliedSchemaChangeEvents, refreshedEvolvedSchemas))); + pendingResponseFuture = null; + + Preconditions.checkState( + schemaChangeStatus == RequestStatus.APPLYING, + "Illegal schemaChangeStatus state: should be APPLYING before applySchemaChange finishes, not " + + schemaChangeStatus); + schemaChangeStatus = RequestStatus.IDLE; + LOG.info("SchemaChangeStatus switched from APPLYING to IDLE."); + } + + private boolean applyAndUpdateEvolvedSchemaChange(SchemaChangeEvent schemaChangeEvent) { + try { + metadataApplier.applySchemaChange(schemaChangeEvent); + schemaManager.applyEvolvedSchemaChange(schemaChangeEvent); + LOG.info( + "Successfully applied schema change event {} to external system.", + schemaChangeEvent); + return true; + } catch (Throwable t) { + if (shouldIgnoreException(t)) { + LOG.warn( + "Failed to apply schema change {}, but keeps running in tolerant mode. Caused by: {}", + schemaChangeEvent, + t); + return false; + } else { + throw t; + } + } + } + + // ------------------------- + // Utilities + // ------------------------- + + private boolean shouldIgnoreException(Throwable throwable) { + // In IGNORE mode, will never try to apply schema change events + // In EVOLVE and LENIENT mode, such failure will not be tolerated + // In EXCEPTION mode, an exception will be thrown once captured + return (throwable instanceof UnsupportedSchemaChangeEventException) + && (SchemaChangeBehavior.TRY_EVOLVE.equals(behavior)); + } + + /** + * {@code IDLE}: Initial idling state, ready for requests.
+ * {@code APPLYING}: When schema change application finishes (successfully or with exceptions) + */ + private enum RequestStatus { + IDLE, + APPLYING + } + + /** + * Before Flink CDC 3.3, we store routing rules into {@link SchemaCoordinator}'s state, which + * turns out to be unnecessary since data stream topology might change after stateful restarts, + * and stale routing status is both unnecessary and erroneous. This function consumes these + * bytes from the state, but never returns them. + */ + private void consumeUnusedSchemaDerivationBytes(DataInputStream in) throws IOException { + TableIdSerializer tableIdSerializer = TableIdSerializer.INSTANCE; + int derivationMappingSize = in.readInt(); + Map> derivationMapping = new HashMap<>(derivationMappingSize); + for (int i = 0; i < derivationMappingSize; i++) { + // Routed table ID + TableId routedTableId = + tableIdSerializer.deserialize(new DataInputViewStreamWrapper(in)); + // Original table IDs + int numOriginalTables = in.readInt(); + Set originalTableIds = new HashSet<>(numOriginalTables); + for (int j = 0; j < numOriginalTables; j++) { + TableId originalTableId = + tableIdSerializer.deserialize(new DataInputViewStreamWrapper(in)); + originalTableIds.add(originalTableId); + } + derivationMapping.put(routedTableId, originalTableIds); + } + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistryProvider.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaCoordinatorProvider.java similarity index 56% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistryProvider.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaCoordinatorProvider.java index bc261e40f53..253b52cac06 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaRegistryProvider.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaCoordinatorProvider.java @@ -15,24 +15,24 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.coordinator; +package org.apache.flink.cdc.runtime.operators.schema.regular; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.route.RouteRule; import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.runtime.operators.schema.common.CoordinatorExecutorThreadFactory; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; -import org.apache.flink.util.FatalExitExceptionHandler; +import java.time.Duration; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; -/** Provider of {@link SchemaRegistry}. */ +/** Provider of {@link SchemaCoordinator}. */ @Internal -public class SchemaRegistryProvider implements OperatorCoordinator.Provider { +public class SchemaCoordinatorProvider implements OperatorCoordinator.Provider { private static final long serialVersionUID = 1L; private final OperatorID operatorID; @@ -40,18 +40,21 @@ public class SchemaRegistryProvider implements OperatorCoordinator.Provider { private final MetadataApplier metadataApplier; private final List routingRules; private final SchemaChangeBehavior schemaChangeBehavior; + private final Duration rpcTimeout; - public SchemaRegistryProvider( + public SchemaCoordinatorProvider( OperatorID operatorID, String operatorName, MetadataApplier metadataApplier, List routingRules, - SchemaChangeBehavior schemaChangeBehavior) { + SchemaChangeBehavior schemaChangeBehavior, + Duration rpcTimeout) { this.operatorID = operatorID; this.operatorName = operatorName; this.metadataApplier = metadataApplier; this.routingRules = routingRules; this.schemaChangeBehavior = schemaChangeBehavior; + this.rpcTimeout = rpcTimeout; } @Override @@ -66,50 +69,13 @@ public OperatorCoordinator create(OperatorCoordinator.Context context) throws Ex "schema-evolution-coordinator", context.getUserCodeClassloader()); ExecutorService coordinatorExecutor = Executors.newSingleThreadExecutor(coordinatorThreadFactory); - return new SchemaRegistry( + return new SchemaCoordinator( operatorName, context, coordinatorExecutor, metadataApplier, routingRules, - schemaChangeBehavior); - } - - /** A thread factory class that provides some helper methods. */ - public static class CoordinatorExecutorThreadFactory implements ThreadFactory { - - private final String coordinatorThreadName; - private final ClassLoader cl; - private final Thread.UncaughtExceptionHandler errorHandler; - - private Thread t; - - CoordinatorExecutorThreadFactory( - final String coordinatorThreadName, final ClassLoader contextClassLoader) { - this(coordinatorThreadName, contextClassLoader, FatalExitExceptionHandler.INSTANCE); - } - - CoordinatorExecutorThreadFactory( - final String coordinatorThreadName, - final ClassLoader contextClassLoader, - final Thread.UncaughtExceptionHandler errorHandler) { - this.coordinatorThreadName = coordinatorThreadName; - this.cl = contextClassLoader; - this.errorHandler = errorHandler; - } - - @Override - public synchronized Thread newThread(Runnable r) { - if (t != null) { - throw new Error( - "This indicates that a fatal error has happened and caused the " - + "coordinator executor thread to exit. Check the earlier logs" - + "to see the root cause of the problem."); - } - t = new Thread(r, coordinatorThreadName); - t.setContextClassLoader(cl); - t.setUncaughtExceptionHandler(errorHandler); - return t; - } + schemaChangeBehavior, + rpcTimeout); } } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperator.java new file mode 100644 index 00000000000..96322776f02 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperator.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.regular; + +import org.apache.flink.cdc.common.annotation.Internal; +import org.apache.flink.cdc.common.annotation.VisibleForTesting; +import org.apache.flink.cdc.common.event.DataChangeEvent; +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.FlushEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaDerivator; +import org.apache.flink.cdc.runtime.operators.schema.common.TableIdRouter; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils; +import org.apache.flink.cdc.runtime.operators.schema.common.event.regular.SchemaChangeRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.regular.SchemaChangeResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.metrics.SchemaOperatorMetrics; +import org.apache.flink.cdc.runtime.typeutils.NonceUtils; +import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway; +import org.apache.flink.runtime.operators.coordination.CoordinationRequest; +import org.apache.flink.runtime.operators.coordination.CoordinationResponse; +import org.apache.flink.streaming.api.graph.StreamConfig; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.ChainingStrategy; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.StreamTask; +import org.apache.flink.util.SerializedValue; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.time.Duration; +import java.time.Instant; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import static org.apache.flink.cdc.common.pipeline.PipelineOptions.DEFAULT_SCHEMA_OPERATOR_RPC_TIMEOUT; + +/** + * The operator will evolve schemas in {@link + * org.apache.flink.cdc.runtime.operators.schema.regular.SchemaCoordinator} for incoming {@link + * SchemaChangeEvent}s and block the stream for tables before their schema changes finish. + */ +@Internal +public class SchemaOperator extends AbstractStreamOperator + implements OneInputStreamOperator, Serializable { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(SchemaOperator.class); + + // Final fields that are set in constructor + private final String timezone; + private final Duration rpcTimeout; + private final SchemaChangeBehavior schemaChangeBehavior; + private final List routingRules; + + // Transient fields that are set during open() + private transient int subTaskId; + private transient TaskOperatorEventGateway toCoordinator; + private transient SchemaOperatorMetrics schemaOperatorMetrics; + private transient volatile Map originalSchemaMap; + private transient volatile Map evolvedSchemaMap; + private transient TableIdRouter router; + + @VisibleForTesting + public SchemaOperator(List routingRules) { + this(routingRules, DEFAULT_SCHEMA_OPERATOR_RPC_TIMEOUT); + } + + @VisibleForTesting + public SchemaOperator(List routingRules, Duration rpcTimeOut) { + this(routingRules, rpcTimeOut, SchemaChangeBehavior.EVOLVE); + } + + @VisibleForTesting + public SchemaOperator( + List routingRules, + Duration rpcTimeOut, + SchemaChangeBehavior schemaChangeBehavior) { + this(routingRules, rpcTimeOut, schemaChangeBehavior, "UTC"); + } + + public SchemaOperator( + List routingRules, + Duration rpcTimeOut, + SchemaChangeBehavior schemaChangeBehavior, + String timezone) { + this.chainingStrategy = ChainingStrategy.ALWAYS; + this.rpcTimeout = rpcTimeOut; + this.schemaChangeBehavior = schemaChangeBehavior; + this.timezone = timezone; + this.routingRules = routingRules; + } + + @Override + public void setup( + StreamTask containingTask, + StreamConfig config, + Output> output) { + super.setup(containingTask, config, output); + this.toCoordinator = containingTask.getEnvironment().getOperatorCoordinatorEventGateway(); + } + + @Override + public void open() throws Exception { + super.open(); + this.schemaOperatorMetrics = + new SchemaOperatorMetrics( + getRuntimeContext().getMetricGroup(), schemaChangeBehavior); + this.subTaskId = getRuntimeContext().getTaskInfo().getIndexOfThisSubtask(); + this.originalSchemaMap = new HashMap<>(); + this.evolvedSchemaMap = new HashMap<>(); + this.router = new TableIdRouter(routingRules); + } + + /** + * This method is guaranteed to not be called concurrently with other methods of the operator. + */ + @Override + public void processElement(StreamRecord streamRecord) throws Exception { + Event event = streamRecord.getValue(); + if (event instanceof SchemaChangeEvent) { + handleSchemaChangeEvent((SchemaChangeEvent) event); + } else if (event instanceof DataChangeEvent) { + handleDataChangeEvent((DataChangeEvent) event); + } else { + throw new RuntimeException("Unknown event type in Stream record: " + event); + } + } + + private void handleSchemaChangeEvent(SchemaChangeEvent originalEvent) throws Exception { + // First, update original schema map unconditionally and it will never fail + TableId tableId = originalEvent.tableId(); + originalSchemaMap.compute( + tableId, + (tId, schema) -> SchemaUtils.applySchemaChangeEvent(schema, originalEvent)); + schemaOperatorMetrics.increaseSchemaChangeEvents(1); + + // First, send FlushEvent or it might be blocked later + long nonce = + NonceUtils.generateNonce(getCurrentTimestamp(), subTaskId, tableId, originalEvent); + LOG.info("{}> Sending the FlushEvent for table {} (nonce: {}).", subTaskId, tableId, nonce); + output.collect(new StreamRecord<>(FlushEvent.of(tableId, nonce))); + + // Then, queue to request schema change to SchemaCoordinator. + SchemaChangeResponse response = requestSchemaChange(tableId, originalEvent, nonce); + + if (response.isSuccess()) { + LOG.info("{}> Successfully requested schema change.", subTaskId); + LOG.info( + "{}> Finished schema change events: {}", + subTaskId, + response.getAppliedSchemaChangeEvents()); + LOG.info("{}> Refreshed evolved schemas: {}", subTaskId, response.getEvolvedSchemas()); + + // After this request got successfully applied to DBMS, we can... + List finishedSchemaChangeEvents = + response.getAppliedSchemaChangeEvents(); + + // Update local evolved schema map's cache + evolvedSchemaMap.putAll(response.getEvolvedSchemas()); + + // and emit the finished event to downstream + for (SchemaChangeEvent finishedEvent : finishedSchemaChangeEvents) { + output.collect(new StreamRecord<>(finishedEvent)); + } + + schemaOperatorMetrics.increaseFinishedSchemaChangeEvents( + finishedSchemaChangeEvents.size()); + } else if (response.isDuplicate()) { + LOG.info( + "{}> Schema change event {} has been handled in another subTask already.", + subTaskId, + originalEvent); + + schemaOperatorMetrics.increaseIgnoredSchemaChangeEvents(1); + } else if (response.isIgnored()) { + LOG.info( + "{}> Schema change event {} has been ignored. No schema evolution needed.", + subTaskId, + originalEvent); + + schemaOperatorMetrics.increaseIgnoredSchemaChangeEvents(1); + } else { + throw new IllegalStateException("Unexpected response status: " + response); + } + } + + private void handleDataChangeEvent(DataChangeEvent dataChangeEvent) { + TableId tableId = dataChangeEvent.tableId(); + + // First, we obtain the original schema corresponding to this data change event + Schema originalSchema = originalSchemaMap.get(dataChangeEvent.tableId()); + + // Then, for each routing terminus, coerce data records to the expected schema + for (TableId sinkTableId : router.route(tableId)) { + Schema evolvedSchema = evolvedSchemaMap.get(sinkTableId); + SchemaDerivator.coerceDataRecord( + timezone, + DataChangeEvent.route(dataChangeEvent, sinkTableId), + originalSchema, + evolvedSchema) + .ifPresent(evt -> output.collect(new StreamRecord<>(evt))); + } + } + + private SchemaChangeResponse requestSchemaChange( + TableId tableId, SchemaChangeEvent schemaChangeEvent, long nonce) + throws InterruptedException, TimeoutException { + long deadline = System.currentTimeMillis() + rpcTimeout.toMillis(); + while (true) { + SchemaChangeResponse response = + sendRequestToCoordinator( + new SchemaChangeRequest(tableId, schemaChangeEvent, subTaskId, nonce)); + if (System.currentTimeMillis() < deadline) { + if (response.isRegistryBusy()) { + LOG.info( + "{}> Schema Registry is busy now, waiting for next request...", + subTaskId); + Thread.sleep(1000); + } else if (response.isWaitingForFlush()) { + LOG.info( + "{}> Schema change event (with nonce {}) has not collected enough flush success events from writers, waiting...", + subTaskId, + nonce); + Thread.sleep(1000); + } else { + return response; + } + } else { + throw new TimeoutException("Timeout when requesting schema change."); + } + } + } + + private + RESPONSE sendRequestToCoordinator(REQUEST request) { + try { + CompletableFuture responseFuture = + toCoordinator.sendRequestToCoordinator( + getOperatorID(), new SerializedValue<>(request)); + return CoordinationResponseUtils.unwrap( + responseFuture.get(rpcTimeout.toMillis(), TimeUnit.MILLISECONDS)); + } catch (Exception e) { + throw new IllegalStateException( + "Failed to send request to coordinator: " + request.toString(), e); + } + } + + /** Visible for mocking in test cases. */ + @VisibleForTesting + protected int getCurrentTimestamp() { + return (int) Instant.now().getEpochSecond(); + } + + @VisibleForTesting + public void registerInitialSchema(TableId tableId, Schema schema) { + originalSchemaMap.put(tableId, schema); + evolvedSchemaMap.put(tableId, schema); + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperatorFactory.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperatorFactory.java similarity index 84% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperatorFactory.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperatorFactory.java index 367f6559775..630acc90463 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperatorFactory.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperatorFactory.java @@ -15,14 +15,13 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema; +package org.apache.flink.cdc.runtime.operators.schema.regular; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.event.Event; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.route.RouteRule; import org.apache.flink.cdc.common.sink.MetadataApplier; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistryProvider; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; import org.apache.flink.streaming.api.operators.CoordinatedOperatorFactory; @@ -42,23 +41,30 @@ public class SchemaOperatorFactory extends SimpleOperatorFactory private final MetadataApplier metadataApplier; private final List routingRules; private final SchemaChangeBehavior schemaChangeBehavior; + private final Duration rpcTimeout; public SchemaOperatorFactory( MetadataApplier metadataApplier, List routingRules, - Duration rpcTimeOut, + Duration rpcTimeout, SchemaChangeBehavior schemaChangeBehavior, String timezone) { - super(new SchemaOperator(routingRules, rpcTimeOut, schemaChangeBehavior, timezone)); + super(new SchemaOperator(routingRules, rpcTimeout, schemaChangeBehavior, timezone)); this.metadataApplier = metadataApplier; this.routingRules = routingRules; this.schemaChangeBehavior = schemaChangeBehavior; + this.rpcTimeout = rpcTimeout; } @Override public OperatorCoordinator.Provider getCoordinatorProvider( String operatorName, OperatorID operatorID) { - return new SchemaRegistryProvider( - operatorID, operatorName, metadataApplier, routingRules, schemaChangeBehavior); + return new SchemaCoordinatorProvider( + operatorID, + operatorName, + metadataApplier, + routingRules, + schemaChangeBehavior, + rpcTimeout); } } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/sink/SchemaEvolutionClient.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/sink/SchemaEvolutionClient.java index 0e7df4a2446..fe2acbe0d4c 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/sink/SchemaEvolutionClient.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/sink/SchemaEvolutionClient.java @@ -19,14 +19,13 @@ import org.apache.flink.cdc.common.event.TableId; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.apache.flink.cdc.runtime.operators.schema.event.FlushSuccessEvent; -import org.apache.flink.cdc.runtime.operators.schema.event.GetEvolvedSchemaRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.GetEvolvedSchemaResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.GetOriginalSchemaRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.GetOriginalSchemaResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SinkWriterRegisterEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.FlushSuccessEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.SinkWriterRegisterEvent; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway; import org.apache.flink.util.SerializedValue; @@ -34,11 +33,11 @@ import java.io.IOException; import java.util.Optional; -import static org.apache.flink.cdc.runtime.operators.schema.event.CoordinationResponseUtils.unwrap; +import static org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils.unwrap; /** - * Client for {@link DataSinkWriterOperator} interact with {@link SchemaRegistry} when table schema - * evolution happened. + * Client for {@link DataSinkWriterOperator} interact with Schema Registry (Could be distributed or + * regular) when table schema evolution happened. */ public class SchemaEvolutionClient { @@ -53,17 +52,17 @@ public SchemaEvolutionClient( this.schemaOperatorID = schemaOperatorID; } - /** send {@link SinkWriterRegisterEvent} to {@link SchemaRegistry}. */ + /** send {@link SinkWriterRegisterEvent} to Schema Registry. */ public void registerSubtask(int subtask) throws IOException { toCoordinator.sendOperatorEventToCoordinator( schemaOperatorID, new SerializedValue<>(new SinkWriterRegisterEvent(subtask))); } - /** send {@link FlushSuccessEvent} to {@link SchemaRegistry}. */ + /** send {@link FlushSuccessEvent} to Schema Registry. */ public void notifyFlushSuccess(int subtask, TableId tableId, long nonce) throws IOException { toCoordinator.sendOperatorEventToCoordinator( schemaOperatorID, - new SerializedValue<>(new FlushSuccessEvent(subtask, tableId, nonce))); + new SerializedValue<>(FlushSuccessEvent.of(subtask, tableId, nonce))); } public Optional getLatestEvolvedSchema(TableId tableId) throws Exception { diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperator.java index dc6f07751ff..d354a62bbf4 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperator.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperator.java @@ -32,6 +32,7 @@ import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.schema.Selectors; import org.apache.flink.cdc.common.udf.UserDefinedFunctionContext; +import org.apache.flink.cdc.common.utils.SchemaMergingUtils; import org.apache.flink.cdc.common.utils.SchemaUtils; import org.apache.flink.cdc.runtime.parser.TransformParser; import org.apache.flink.streaming.api.graph.StreamConfig; @@ -358,7 +359,7 @@ private Schema transformSchema(TableId tableId, Schema schema) throws Exception return schema; } - return SchemaUtils.inferWiderSchema(newSchemas); + return SchemaMergingUtils.getCommonSchema(newSchemas); } private List getUdfFunctionInstances() { diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/DistributedPrePartitionOperator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/DistributedPrePartitionOperator.java new file mode 100644 index 00000000000..d3201248210 --- /dev/null +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/DistributedPrePartitionOperator.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.partitioning; + +import org.apache.flink.cdc.common.annotation.Internal; +import org.apache.flink.cdc.common.event.DataChangeEvent; +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.function.HashFunction; +import org.apache.flink.cdc.common.function.HashFunctionProvider; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.utils.SchemaUtils; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; +import org.apache.flink.cdc.runtime.serializer.event.EventSerializer; +import org.apache.flink.runtime.state.StateSnapshotContext; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.ChainingStrategy; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +/** + * Operator for processing events from {@link SchemaOperator} before {@link EventPartitioner} with + * reducible topology. + */ +@Internal +public class DistributedPrePartitionOperator extends AbstractStreamOperator + implements OneInputStreamOperator, Serializable { + private static final long serialVersionUID = 1L; + + private final int downstreamParallelism; + private final HashFunctionProvider hashFunctionProvider; + + // Schema and HashFunctionMap used in schema inferencing mode. + private transient Map schemaMap; + private transient Map> hashFunctionMap; + + private transient int subTaskId; + + public DistributedPrePartitionOperator( + int downstreamParallelism, HashFunctionProvider hashFunctionProvider) { + this.chainingStrategy = ChainingStrategy.ALWAYS; + this.downstreamParallelism = downstreamParallelism; + this.hashFunctionProvider = hashFunctionProvider; + } + + @Override + public void open() throws Exception { + super.open(); + subTaskId = getRuntimeContext().getTaskInfo().getIndexOfThisSubtask(); + schemaMap = new HashMap<>(); + hashFunctionMap = new HashMap<>(); + } + + @Override + public void processElement(StreamRecord element) throws Exception { + Event event = element.getValue(); + if (event instanceof SchemaChangeEvent) { + SchemaChangeEvent schemaChangeEvent = (SchemaChangeEvent) event; + TableId tableId = schemaChangeEvent.tableId(); + + // Update schema map + schemaMap.compute( + tableId, + (tId, oldSchema) -> + SchemaUtils.applySchemaChangeEvent(oldSchema, schemaChangeEvent)); + + // For malformed dangling dropTableEvents, we simply ignore this event to avoid breaking + // the pipeline. + if (schemaMap.get(tableId) == null) { + return; + } + + // Update hash function + hashFunctionMap.put(tableId, recreateHashFunction(tableId)); + + // Broadcast SchemaChangeEvent + broadcastEvent(event); + } else if (event instanceof DataChangeEvent) { + // Partition DataChangeEvent by table ID and primary keys + partitionBy((DataChangeEvent) event); + } else { + throw new IllegalStateException( + subTaskId + "> PrePartition operator received an unexpected event: " + event); + } + } + + private void partitionBy(DataChangeEvent dataChangeEvent) { + output.collect( + new StreamRecord<>( + PartitioningEvent.ofReducible( + dataChangeEvent, + subTaskId, + hashFunctionMap + .get(dataChangeEvent.tableId()) + .hashcode(dataChangeEvent) + % downstreamParallelism))); + } + + private void broadcastEvent(Event toBroadcast) { + for (int i = 0; i < downstreamParallelism; i++) { + // Deep-copying each event is required since downstream subTasks might run in the same + // JVM + Event copiedEvent = EventSerializer.INSTANCE.copy(toBroadcast); + output.collect( + new StreamRecord<>(PartitioningEvent.ofReducible(copiedEvent, subTaskId, i))); + } + } + + private HashFunction recreateHashFunction(TableId tableId) { + return hashFunctionProvider.getHashFunction(tableId, schemaMap.get(tableId)); + } + + @Override + public void snapshotState(StateSnapshotContext context) throws Exception { + // Needless to do anything, since AbstractStreamOperator#snapshotState and #processElement + // is guaranteed not to be mixed together. + } +} diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PartitioningEvent.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PartitioningEvent.java index 076a843929b..b06c84d66e2 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PartitioningEvent.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PartitioningEvent.java @@ -29,10 +29,28 @@ @Internal public class PartitioningEvent implements Event { private final Event payload; + private final int sourcePartition; private final int targetPartition; - public PartitioningEvent(Event payload, int targetPartition) { + /** + * For partitioning events with regular topology, source partition information is not necessary. + */ + public static PartitioningEvent ofRegular(Event payload, int targetPartition) { + return new PartitioningEvent(payload, -1, targetPartition); + } + + /** + * For reducible events, we need to track its upstream source subTask ID to correctly + * distinguish events from different partitions. + */ + public static PartitioningEvent ofReducible( + Event payload, int sourcePartition, int targetPartition) { + return new PartitioningEvent(payload, sourcePartition, targetPartition); + } + + private PartitioningEvent(Event payload, int sourcePartition, int targetPartition) { this.payload = payload; + this.sourcePartition = sourcePartition; this.targetPartition = targetPartition; } @@ -40,6 +58,10 @@ public Event getPayload() { return payload; } + public int getSourcePartition() { + return sourcePartition; + } + public int getTargetPartition() { return targetPartition; } @@ -53,12 +75,14 @@ public boolean equals(Object o) { return false; } PartitioningEvent that = (PartitioningEvent) o; - return targetPartition == that.targetPartition && Objects.equals(payload, that.payload); + return sourcePartition == that.sourcePartition + && targetPartition == that.targetPartition + && Objects.equals(payload, that.payload); } @Override public int hashCode() { - return Objects.hash(payload, targetPartition); + return Objects.hash(payload, sourcePartition, targetPartition); } @Override @@ -66,6 +90,8 @@ public String toString() { return "PartitioningEvent{" + "payload=" + payload + + ", sourcePartition=" + + sourcePartition + ", targetPartition=" + targetPartition + '}'; diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperator.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/RegularPrePartitionOperator.java similarity index 93% rename from flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperator.java rename to flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/RegularPrePartitionOperator.java index 938e6950d38..38f177472fb 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperator.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/partitioning/RegularPrePartitionOperator.java @@ -26,7 +26,7 @@ import org.apache.flink.cdc.common.function.HashFunction; import org.apache.flink.cdc.common.function.HashFunctionProvider; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.runtime.operators.schema.SchemaOperator; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaOperator; import org.apache.flink.cdc.runtime.operators.sink.SchemaEvolutionClient; import org.apache.flink.cdc.runtime.serializer.event.EventSerializer; import org.apache.flink.runtime.jobgraph.OperatorID; @@ -45,9 +45,12 @@ import java.time.Duration; import java.util.Optional; -/** Operator for processing events from {@link SchemaOperator} before {@link EventPartitioner}. */ +/** + * Operator for processing events from {@link SchemaOperator} before {@link EventPartitioner} with + * regular topology. + */ @Internal -public class PrePartitionOperator extends AbstractStreamOperator +public class RegularPrePartitionOperator extends AbstractStreamOperator implements OneInputStreamOperator, Serializable { private static final long serialVersionUID = 1L; @@ -60,7 +63,7 @@ public class PrePartitionOperator extends AbstractStreamOperator> cachedHashFunctions; - public PrePartitionOperator( + public RegularPrePartitionOperator( OperatorID schemaOperatorId, int downstreamParallelism, HashFunctionProvider hashFunctionProvider) { @@ -100,7 +103,7 @@ public void processElement(StreamRecord element) throws Exception { private void partitionBy(DataChangeEvent dataChangeEvent) throws Exception { output.collect( new StreamRecord<>( - new PartitioningEvent( + PartitioningEvent.ofRegular( dataChangeEvent, cachedHashFunctions .get(dataChangeEvent.tableId()) @@ -113,7 +116,7 @@ private void broadcastEvent(Event toBroadcast) { // Deep-copying each event is required since downstream subTasks might run in the same // JVM Event copiedEvent = EventSerializer.INSTANCE.copy(toBroadcast); - output.collect(new StreamRecord<>(new PartitioningEvent(copiedEvent, i))); + output.collect(new StreamRecord<>(PartitioningEvent.ofRegular(copiedEvent, i))); } } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializer.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializer.java index 2c10ad73c49..17d2f59bd1a 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializer.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializer.java @@ -61,9 +61,13 @@ public Event createInstance() { @Override public Event copy(Event from) { if (from instanceof FlushEvent) { - return new FlushEvent( - tableIdSerializer.copy(((FlushEvent) from).getTableId()), - ((FlushEvent) from).getNonce()); + FlushEvent flushEvent = (FlushEvent) from; + if (flushEvent.getTableId() == null) { + return FlushEvent.ofAll(); + } else { + return FlushEvent.of( + tableIdSerializer.copy(flushEvent.getTableId()), (flushEvent.getNonce())); + } } else if (from instanceof SchemaChangeEvent) { return schemaChangeEventSerializer.copy((SchemaChangeEvent) from); } else if (from instanceof DataChangeEvent) { @@ -104,7 +108,7 @@ public Event deserialize(DataInputView source) throws IOException { EventClass eventClass = enumSerializer.deserialize(source); switch (eventClass) { case FLUSH_EVENT: - return new FlushEvent(tableIdSerializer.deserialize(source), source.readLong()); + return FlushEvent.of(tableIdSerializer.deserialize(source), source.readLong()); case DATA_CHANGE_EVENT: return dataChangeEventSerializer.deserialize(source); case SCHEME_CHANGE_EVENT: diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializer.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializer.java index 0cce884af99..a38476dcaad 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializer.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializer.java @@ -43,13 +43,15 @@ public boolean isImmutableType() { @Override public PartitioningEvent createInstance() { - return new PartitioningEvent(null, -1); + return PartitioningEvent.ofReducible(null, -1, -1); } @Override public PartitioningEvent copy(PartitioningEvent from) { - return new PartitioningEvent( - eventSerializer.copy(from.getPayload()), from.getTargetPartition()); + return PartitioningEvent.ofReducible( + eventSerializer.copy(from.getPayload()), + from.getSourcePartition(), + from.getTargetPartition()); } @Override @@ -65,14 +67,16 @@ public int getLength() { @Override public void serialize(PartitioningEvent record, DataOutputView target) throws IOException { eventSerializer.serialize(record.getPayload(), target); + target.writeInt(record.getSourcePartition()); target.writeInt(record.getTargetPartition()); } @Override public PartitioningEvent deserialize(DataInputView source) throws IOException { Event payload = eventSerializer.deserialize(source); + int sourcePartition = source.readInt(); int targetPartition = source.readInt(); - return new PartitioningEvent(payload, targetPartition); + return PartitioningEvent.ofReducible(payload, sourcePartition, targetPartition); } @Override diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/SchemaChangeEventSerializer.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/SchemaChangeEventSerializer.java index 4f556b5cad2..7a5da9c7231 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/SchemaChangeEventSerializer.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/serializer/event/SchemaChangeEventSerializer.java @@ -67,6 +67,11 @@ public SchemaChangeEventType getType() { public TableId tableId() { return TableId.tableId("unknown", "unknown", "unknown"); } + + @Override + public SchemaChangeEvent copy(TableId newTableId) { + return null; + } }; } diff --git a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/typeutils/NonceUtils.java b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/typeutils/NonceUtils.java index 0d9e0b74643..1e2b1290995 100644 --- a/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/typeutils/NonceUtils.java +++ b/flink-cdc-runtime/src/main/java/org/apache/flink/cdc/runtime/typeutils/NonceUtils.java @@ -46,7 +46,7 @@ public static long generateNonce( /** Generating a {@link FlushEvent} carrying a nonce. */ public static FlushEvent generateFlushEvent( int timestamp, int subTaskId, TableId tableId, Event schemaChangeEvent) { - return new FlushEvent( + return FlushEvent.of( tableId, generateNonce(timestamp, subTaskId, tableId, schemaChangeEvent)); } } diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaDerivatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaDerivatorTest.java new file mode 100644 index 00000000000..f921f024c36 --- /dev/null +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaDerivatorTest.java @@ -0,0 +1,602 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.DropColumnEvent; +import org.apache.flink.cdc.common.event.DropTableEvent; +import org.apache.flink.cdc.common.event.RenameColumnEvent; +import org.apache.flink.cdc.common.event.SchemaChangeEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.event.TruncateTableEvent; +import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.sink.MetadataApplier; +import org.apache.flink.cdc.common.types.DataTypes; + +import org.apache.flink.shaded.guava31.com.google.common.collect.HashBasedTable; +import org.apache.flink.shaded.guava31.com.google.common.collect.Table; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +/** Unit test for {@link SchemaDerivator}. */ +public class SchemaDerivatorTest extends SchemaTestBase { + private static final Set ALL_UPSTREAM_TABLE_IDS = + IntStream.rangeClosed(0, 5) + .boxed() + .flatMap( + dbIdx -> + IntStream.rangeClosed(1, 3) + .mapToObj( + tblIdx -> + TableId.tableId( + "db_" + dbIdx, + "table_" + tblIdx))) + .collect(Collectors.toSet()); + + private static final Table DUMMY_UPSTREAM_SCHEMA_TABLE = + HashBasedTable.create(); + + static { + ALL_UPSTREAM_TABLE_IDS.forEach( + tbl -> { + for (int i = 0; i < 2; i++) { + DUMMY_UPSTREAM_SCHEMA_TABLE.put( + tbl, + i, + Schema.newBuilder() + .physicalColumn( + "id", + DataTypes.INT(), + String.format("%s @ %s", tbl, i)) + .build()); + } + }); + } + + private static final TableId NORMALIZE_TEST_TABLE_ID = TableId.parse("foo.bar.baz"); + private static final Schema NORMALIZE_TEST_SCHEMA = + Schema.newBuilder() + .physicalColumn("id", DataTypes.INT()) + .physicalColumn("name", DataTypes.VARCHAR(128)) + .physicalColumn("age", DataTypes.FLOAT()) + .physicalColumn("notes", DataTypes.STRING()) + .build(); + private static final MetadataApplier MOCKED_METADATA_APPLIER = + schemaChangeEvent -> { + throw new UnsupportedOperationException( + "Mocked metadata applier doesn't really do anything."); + }; + + private static Set getAffected(String... tables) { + return SchemaDerivator.getAffectedEvolvedTables( + TABLE_ID_ROUTER, + Arrays.stream(tables).map(TableId::parse).collect(Collectors.toSet())) + .stream() + .map(TableId::toString) + .collect(Collectors.toSet()); + } + + private static Set reverseLookupTable(String tableId) { + return SchemaDerivator.reverseLookupDependingUpstreamTables( + TABLE_ID_ROUTER, TableId.parse(tableId), DUMMY_UPSTREAM_SCHEMA_TABLE) + .stream() + .map(TableId::toString) + .collect(Collectors.toSet()); + } + + private static Set reverseLookupSchema(String tableId) { + return SchemaDerivator.reverseLookupDependingUpstreamSchemas( + TABLE_ID_ROUTER, TableId.parse(tableId), DUMMY_UPSTREAM_SCHEMA_TABLE) + .stream() + .map(schema -> schema.getColumn("id").get().getComment()) + .collect(Collectors.toSet()); + } + + private static List normalizeEvent( + SchemaChangeBehavior behavior, SchemaChangeEvent... events) { + return SchemaDerivator.normalizeSchemaChangeEvents( + NORMALIZE_TEST_SCHEMA, + Arrays.stream(events).collect(Collectors.toList()), + behavior, + MOCKED_METADATA_APPLIER); + } + + @Test + void testGetAffectedEvolvedTables() { + assertThat(getAffected()).isEmpty(); + + // No routing rule, behaves like one-to-one routing + assertThat(getAffected("db_0.table_1")).containsExactlyInAnyOrder("db_0.table_1"); + assertThat(getAffected("db_0.table_2")).containsExactlyInAnyOrder("db_0.table_2"); + assertThat(getAffected("db_0.table_3")).containsExactlyInAnyOrder("db_0.table_3"); + assertThat(getAffected("db_0.table_1", "db_0.table_2")) + .containsExactlyInAnyOrder("db_0.table_1", "db_0.table_2"); + assertThat(getAffected("db_0.table_1", "db_0.table_3")) + .containsExactlyInAnyOrder("db_0.table_1", "db_0.table_3"); + assertThat(getAffected("db_0.table_2", "db_0.table_3")) + .containsExactlyInAnyOrder("db_0.table_2", "db_0.table_3"); + assertThat(getAffected("db_0.table_1", "db_0.table_2", "db_0.table_3")) + .containsExactlyInAnyOrder("db_0.table_1", "db_0.table_2", "db_0.table_3"); + + // One-to-one routing + assertThat(getAffected("db_1.table_1")).containsExactlyInAnyOrder("db_1.table_1"); + assertThat(getAffected("db_1.table_2")).containsExactlyInAnyOrder("db_1.table_2"); + assertThat(getAffected("db_1.table_3")).containsExactlyInAnyOrder("db_1.table_3"); + assertThat(getAffected("db_1.table_1", "db_1.table_2")) + .containsExactlyInAnyOrder("db_1.table_1", "db_1.table_2"); + assertThat(getAffected("db_1.table_1", "db_1.table_3")) + .containsExactlyInAnyOrder("db_1.table_1", "db_1.table_3"); + assertThat(getAffected("db_1.table_2", "db_1.table_3")) + .containsExactlyInAnyOrder("db_1.table_2", "db_1.table_3"); + assertThat(getAffected("db_1.table_1", "db_1.table_2", "db_1.table_3")) + .containsExactlyInAnyOrder("db_1.table_1", "db_1.table_2", "db_1.table_3"); + + // One-to-one routing, but twisted + assertThat(getAffected("db_2.table_1")).containsExactlyInAnyOrder("db_2.table_2"); + assertThat(getAffected("db_2.table_2")).containsExactlyInAnyOrder("db_2.table_3"); + assertThat(getAffected("db_2.table_3")).containsExactlyInAnyOrder("db_2.table_1"); + assertThat(getAffected("db_2.table_1", "db_2.table_2")) + .containsExactlyInAnyOrder("db_2.table_2", "db_2.table_3"); + assertThat(getAffected("db_2.table_1", "db_2.table_3")) + .containsExactlyInAnyOrder("db_2.table_2", "db_2.table_1"); + assertThat(getAffected("db_2.table_2", "db_2.table_3")) + .containsExactlyInAnyOrder("db_2.table_3", "db_2.table_1"); + assertThat(getAffected("db_2.table_1", "db_2.table_2", "db_2.table_3")) + .containsExactlyInAnyOrder("db_2.table_2", "db_2.table_3", "db_2.table_1"); + + // Merging-table routing + assertThat(getAffected("db_3.table_1")).containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(getAffected("db_3.table_2")).containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(getAffected("db_3.table_3")).containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(getAffected("db_3.table_1", "db_3.table_2")) + .containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(getAffected("db_3.table_1", "db_3.table_3")) + .containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(getAffected("db_3.table_2", "db_3.table_3")) + .containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(getAffected("db_3.table_1", "db_3.table_2", "db_3.table_3")) + .containsExactlyInAnyOrder("db_3.table_merged"); + + // Broadcasting routing + assertThat(getAffected("db_4.table_1")) + .containsExactlyInAnyOrder("db_4.table_a", "db_4.table_b", "db_4.table_c"); + assertThat(getAffected("db_4.table_2")) + .containsExactlyInAnyOrder("db_4.table_b", "db_4.table_c"); + assertThat(getAffected("db_4.table_3")).containsExactlyInAnyOrder("db_4.table_c"); + assertThat(getAffected("db_4.table_1", "db_4.table_2")) + .containsExactlyInAnyOrder("db_4.table_a", "db_4.table_b", "db_4.table_c"); + assertThat(getAffected("db_4.table_1", "db_4.table_3")) + .containsExactlyInAnyOrder("db_4.table_a", "db_4.table_b", "db_4.table_c"); + assertThat(getAffected("db_4.table_2", "db_4.table_3")) + .containsExactlyInAnyOrder("db_4.table_b", "db_4.table_c"); + assertThat(getAffected("db_4.table_1", "db_4.table_2", "db_4.table_3")) + .containsExactlyInAnyOrder("db_4.table_a", "db_4.table_b", "db_4.table_c"); + + // RepSym routing + assertThat(getAffected("db_5.table_1", "db_5.table_2")) + .containsExactlyInAnyOrder( + "db_5.prefix_table_1_suffix", "db_5.prefix_table_2_suffix"); + assertThat(getAffected("db_5.table_1", "db_5.table_3")) + .containsExactlyInAnyOrder( + "db_5.prefix_table_1_suffix", "db_5.prefix_table_3_suffix"); + assertThat(getAffected("db_5.table_2", "db_5.table_3")) + .containsExactlyInAnyOrder( + "db_5.prefix_table_2_suffix", "db_5.prefix_table_3_suffix"); + assertThat(getAffected("db_5.table_1", "db_5.table_2", "db_5.table_3")) + .containsExactlyInAnyOrder( + "db_5.prefix_table_1_suffix", + "db_5.prefix_table_2_suffix", + "db_5.prefix_table_3_suffix"); + } + + @Test + void testReverseLookupDependingUpstreamTables() { + assertThat(reverseLookupTable("db_0.table_1")).containsExactlyInAnyOrder("db_0.table_1"); + assertThat(reverseLookupTable("db_0.table_2")).containsExactlyInAnyOrder("db_0.table_2"); + assertThat(reverseLookupTable("db_0.table_3")).containsExactlyInAnyOrder("db_0.table_3"); + + assertThat(reverseLookupTable("db_1.table_1")).containsExactlyInAnyOrder("db_1.table_1"); + assertThat(reverseLookupTable("db_1.table_2")).containsExactlyInAnyOrder("db_1.table_2"); + assertThat(reverseLookupTable("db_1.table_3")).containsExactlyInAnyOrder("db_1.table_3"); + + assertThat(reverseLookupTable("db_2.table_1")).containsExactlyInAnyOrder("db_2.table_3"); + assertThat(reverseLookupTable("db_2.table_2")).containsExactlyInAnyOrder("db_2.table_1"); + assertThat(reverseLookupTable("db_2.table_3")).containsExactlyInAnyOrder("db_2.table_2"); + + assertThat(reverseLookupTable("db_3.table_merged")) + .containsExactlyInAnyOrder("db_3.table_1", "db_3.table_2", "db_3.table_3"); + + assertThat(reverseLookupTable("db_4.table_a")).containsExactlyInAnyOrder("db_4.table_1"); + assertThat(reverseLookupTable("db_4.table_b")) + .containsExactlyInAnyOrder("db_4.table_1", "db_4.table_2"); + assertThat(reverseLookupTable("db_4.table_c")) + .containsExactlyInAnyOrder("db_4.table_1", "db_4.table_2", "db_4.table_3"); + + assertThat(reverseLookupTable("db_5.prefix_table_1_suffix")) + .containsExactlyInAnyOrder("db_5.table_1"); + assertThat(reverseLookupTable("db_5.prefix_table_2_suffix")) + .containsExactlyInAnyOrder("db_5.table_2"); + assertThat(reverseLookupTable("db_5.prefix_table_3_suffix")) + .containsExactlyInAnyOrder("db_5.table_3"); + } + + @Test + void testReverseLookupDependingUpstreamSchemas() { + assertThat(reverseLookupSchema("db_0.table_1")) + .containsExactlyInAnyOrder("db_0.table_1 @ 0", "db_0.table_1 @ 1"); + assertThat(reverseLookupSchema("db_0.table_2")) + .containsExactlyInAnyOrder("db_0.table_2 @ 0", "db_0.table_2 @ 1"); + assertThat(reverseLookupSchema("db_0.table_3")) + .containsExactlyInAnyOrder("db_0.table_3 @ 0", "db_0.table_3 @ 1"); + + assertThat(reverseLookupSchema("db_1.table_1")) + .containsExactlyInAnyOrder("db_1.table_1 @ 0", "db_1.table_1 @ 1"); + assertThat(reverseLookupSchema("db_1.table_2")) + .containsExactlyInAnyOrder("db_1.table_2 @ 0", "db_1.table_2 @ 1"); + assertThat(reverseLookupSchema("db_1.table_3")) + .containsExactlyInAnyOrder("db_1.table_3 @ 0", "db_1.table_3 @ 1"); + + assertThat(reverseLookupSchema("db_2.table_1")) + .containsExactlyInAnyOrder("db_2.table_3 @ 0", "db_2.table_3 @ 1"); + assertThat(reverseLookupSchema("db_2.table_2")) + .containsExactlyInAnyOrder("db_2.table_1 @ 0", "db_2.table_1 @ 1"); + assertThat(reverseLookupSchema("db_2.table_3")) + .containsExactlyInAnyOrder("db_2.table_2 @ 0", "db_2.table_2 @ 1"); + + assertThat(reverseLookupSchema("db_3.table_merged")) + .containsExactlyInAnyOrder( + "db_3.table_1 @ 0", + "db_3.table_1 @ 1", + "db_3.table_2 @ 0", + "db_3.table_2 @ 1", + "db_3.table_3 @ 0", + "db_3.table_3 @ 1"); + + assertThat(reverseLookupSchema("db_4.table_a")) + .containsExactlyInAnyOrder("db_4.table_1 @ 0", "db_4.table_1 @ 1"); + assertThat(reverseLookupSchema("db_4.table_b")) + .containsExactlyInAnyOrder( + "db_4.table_1 @ 0", + "db_4.table_1 @ 1", + "db_4.table_2 @ 0", + "db_4.table_2 @ 1"); + assertThat(reverseLookupSchema("db_4.table_c")) + .containsExactlyInAnyOrder( + "db_4.table_1 @ 0", + "db_4.table_1 @ 1", + "db_4.table_2 @ 0", + "db_4.table_2 @ 1", + "db_4.table_3 @ 0", + "db_4.table_3 @ 1"); + + assertThat(reverseLookupSchema("db_5.prefix_table_1_suffix")) + .containsExactlyInAnyOrder("db_5.table_1 @ 0", "db_5.table_1 @ 1"); + assertThat(reverseLookupSchema("db_5.prefix_table_2_suffix")) + .containsExactlyInAnyOrder("db_5.table_2 @ 0", "db_5.table_2 @ 1"); + assertThat(reverseLookupSchema("db_5.prefix_table_3_suffix")) + .containsExactlyInAnyOrder("db_5.table_3 @ 0", "db_5.table_3 @ 1"); + } + + @Test + void testNormalizeSchemaChangeEventsInEvolveMode() { + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new CreateTableEvent( + NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA))) + .containsExactly( + new CreateTableEvent(NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id"))))) + .containsExactly( + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id")))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE())))) + .containsExactly( + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE()), + Collections.singletonMap("age", DataTypes.FLOAT()))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new RenameColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", "aging")))) + .containsExactly( + new RenameColumnEvent( + NORMALIZE_TEST_TABLE_ID, Collections.singletonMap("age", "aging"))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new DropColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList("notes")))) + .containsExactly( + new DropColumnEvent( + NORMALIZE_TEST_TABLE_ID, Collections.singletonList("notes"))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID))) + .containsExactly(new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.EVOLVE, + new DropTableEvent(NORMALIZE_TEST_TABLE_ID))) + .containsExactly(new DropTableEvent(NORMALIZE_TEST_TABLE_ID)); + } + + @Test + void testNormalizeSchemaChangeEventsInTryEvolveMode() { + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new CreateTableEvent( + NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA))) + .containsExactly( + new CreateTableEvent(NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id"))))) + .containsExactly( + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id")))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE())))) + .containsExactly( + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE()), + Collections.singletonMap("age", DataTypes.FLOAT()))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new RenameColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", "aging")))) + .containsExactly( + new RenameColumnEvent( + NORMALIZE_TEST_TABLE_ID, Collections.singletonMap("age", "aging"))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new DropColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList("notes")))) + .containsExactly( + new DropColumnEvent( + NORMALIZE_TEST_TABLE_ID, Collections.singletonList("notes"))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID))) + .containsExactly(new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.TRY_EVOLVE, + new DropTableEvent(NORMALIZE_TEST_TABLE_ID))) + .containsExactly(new DropTableEvent(NORMALIZE_TEST_TABLE_ID)); + } + + @Test + void testNormalizeSchemaChangeEventsInLenientMode() { + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new CreateTableEvent( + NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA))) + .containsExactly( + new CreateTableEvent(NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id"))))) + .containsExactly( + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.LAST, + null)))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE())))) + .containsExactly( + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE()), + Collections.singletonMap("age", DataTypes.FLOAT()))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new RenameColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", "aging")))) + .containsExactly( + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("aging", DataTypes.FLOAT()), + AddColumnEvent.ColumnPosition.LAST, + null)))); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new DropColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList("notes")))) + .isEmpty(); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID))) + .containsExactly(new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.LENIENT, + new DropTableEvent(NORMALIZE_TEST_TABLE_ID))) + .containsExactly(new DropTableEvent(NORMALIZE_TEST_TABLE_ID)); + } + + @Test + void testNormalizeSchemaChangeEventsInIgnoreMode() { + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new CreateTableEvent( + NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA))) + .containsExactly( + new CreateTableEvent(NORMALIZE_TEST_TABLE_ID, NORMALIZE_TEST_SCHEMA)); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new AddColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn( + "added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id"))))) + .isEmpty(); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new AlterColumnTypeEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE())))) + .isEmpty(); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new RenameColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonMap("age", "aging")))) + .isEmpty(); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new DropColumnEvent( + NORMALIZE_TEST_TABLE_ID, + Collections.singletonList("notes")))) + .isEmpty(); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new TruncateTableEvent(NORMALIZE_TEST_TABLE_ID))) + .isEmpty(); + + assertThat( + normalizeEvent( + SchemaChangeBehavior.IGNORE, + new DropTableEvent(NORMALIZE_TEST_TABLE_ID))) + .isEmpty(); + } +} diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaManagerTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java similarity index 97% rename from flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaManagerTest.java rename to flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java index 5adf50dcf34..743f38d3316 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaManagerTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema.coordinator; +package org.apache.flink.cdc.runtime.operators.schema.common; import org.apache.flink.cdc.common.event.AddColumnEvent; import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; @@ -34,6 +34,7 @@ import java.util.List; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; /** Unit test for {@link SchemaManager}. */ @@ -65,15 +66,11 @@ void testHandlingCreateTableEvent() { .isPresent() .contains(CUSTOMERS_SCHEMA); - // Cannot apply CreateTableEvent multiple times - assertThatThrownBy( + assertThatCode( () -> schemaManager.applyEvolvedSchemaChange( new CreateTableEvent(CUSTOMERS, CUSTOMERS_SCHEMA))) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Unable to apply CreateTableEvent to an existing schema for table \"%s\"", - CUSTOMERS); + .doesNotThrowAnyException(); } @Test diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaTestBase.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaTestBase.java new file mode 100644 index 00000000000..8be12fd95db --- /dev/null +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaTestBase.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.cdc.common.data.binary.BinaryRecordData; +import org.apache.flink.cdc.common.data.binary.BinaryStringData; +import org.apache.flink.cdc.common.event.DataChangeEvent; +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.route.RouteRule; +import org.apache.flink.cdc.common.types.DataType; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.runtime.partitioning.PartitioningEvent; +import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** Some common schema testing utilities & functions. */ +public abstract class SchemaTestBase { + protected static final List ROUTING_RULES = + Arrays.asList( + // Simple 1-to-1 routing rules + new RouteRule("db_1.table_1", "db_1.table_1"), + new RouteRule("db_1.table_2", "db_1.table_2"), + new RouteRule("db_1.table_3", "db_1.table_3"), + + // Re-routed rules + new RouteRule("db_2.table_1", "db_2.table_2"), + new RouteRule("db_2.table_2", "db_2.table_3"), + new RouteRule("db_2.table_3", "db_2.table_1"), + + // Merging tables + new RouteRule("db_3.table_\\.*", "db_3.table_merged"), + + // Broadcast tables + new RouteRule("db_4.table_1", "db_4.table_a"), + new RouteRule("db_4.table_1", "db_4.table_b"), + new RouteRule("db_4.table_1", "db_4.table_c"), + new RouteRule("db_4.table_2", "db_4.table_b"), + new RouteRule("db_4.table_2", "db_4.table_c"), + new RouteRule("db_4.table_3", "db_4.table_c"), + + // RepSym routes + new RouteRule("db_5.table_\\.*", "db_5.prefix_<>_suffix", "<>"), + + // Irrelevant routes + new RouteRule("foo", "bar", null)); + + protected static final TableIdRouter TABLE_ID_ROUTER = new TableIdRouter(ROUTING_RULES); + + protected static BinaryRecordData genBinRec(String rowType, Object... fields) { + return (new BinaryRecordDataGenerator(quickGenRow(rowType).toArray(new DataType[0]))) + .generate( + Arrays.stream(fields) + .map( + e -> + (e instanceof String) + ? BinaryStringData.fromString((String) e) + : e) + .toArray()); + } + + protected static List quickGenRow(String crypticExpr) { + List rowTypes = new ArrayList<>(crypticExpr.length()); + for (char c : crypticExpr.toLowerCase().toCharArray()) { + switch (c) { + case 'b': + rowTypes.add(DataTypes.BOOLEAN()); + break; + case 'i': + rowTypes.add(DataTypes.INT()); + break; + case 'l': + rowTypes.add(DataTypes.BIGINT()); + break; + case 'f': + rowTypes.add(DataTypes.FLOAT()); + break; + case 'd': + rowTypes.add(DataTypes.DOUBLE()); + break; + case 's': + rowTypes.add(DataTypes.STRING()); + break; + default: + throw new IllegalStateException("Unexpected type char: " + c); + } + } + return rowTypes; + } + + protected static DataChangeEvent genInsert(TableId tableId, String rowType, Object... fields) { + return DataChangeEvent.insertEvent(tableId, genBinRec(rowType, fields)); + } + + protected static StreamRecord wrap(Event payload) { + return wrap(payload, 0, 0); + } + + protected static StreamRecord wrap(Event payload, int from, int to) { + return new StreamRecord<>(PartitioningEvent.ofReducible(payload, from, to)); + } +} diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/TableIdRouterTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/TableIdRouterTest.java new file mode 100644 index 00000000000..a6d6c89a2d3 --- /dev/null +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/TableIdRouterTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.common; + +import org.apache.flink.cdc.common.event.TableId; + +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +/** Unit test for {@link TableIdRouter}. */ +public class TableIdRouterTest extends SchemaTestBase { + + private static List testRoute(String tableId) { + return TABLE_ID_ROUTER.route(TableId.parse(tableId)).stream() + .map(TableId::toString) + .collect(Collectors.toList()); + } + + @Test + void testImplicitRoute() { + assertThat(testRoute("db_0.table_1")).containsExactlyInAnyOrder("db_0.table_1"); + assertThat(testRoute("db_0.table_2")).containsExactlyInAnyOrder("db_0.table_2"); + assertThat(testRoute("db_0.table_3")).containsExactlyInAnyOrder("db_0.table_3"); + } + + @Test + void testOneToOneRoute() { + assertThat(testRoute("db_1.table_1")).containsExactlyInAnyOrder("db_1.table_1"); + assertThat(testRoute("db_1.table_2")).containsExactlyInAnyOrder("db_1.table_2"); + assertThat(testRoute("db_1.table_3")).containsExactlyInAnyOrder("db_1.table_3"); + } + + @Test + void testTwistedOneToOneRoute() { + assertThat(testRoute("db_2.table_1")).containsExactlyInAnyOrder("db_2.table_2"); + assertThat(testRoute("db_2.table_2")).containsExactlyInAnyOrder("db_2.table_3"); + assertThat(testRoute("db_2.table_3")).containsExactlyInAnyOrder("db_2.table_1"); + } + + @Test + void testMergingTablesRoute() { + assertThat(testRoute("db_3.table_1")).containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(testRoute("db_3.table_2")).containsExactlyInAnyOrder("db_3.table_merged"); + assertThat(testRoute("db_3.table_3")).containsExactlyInAnyOrder("db_3.table_merged"); + } + + @Test + void testBroadcastingRoute() { + assertThat(testRoute("db_4.table_1")) + .containsExactlyInAnyOrder("db_4.table_a", "db_4.table_b", "db_4.table_c"); + assertThat(testRoute("db_4.table_2")) + .containsExactlyInAnyOrder("db_4.table_b", "db_4.table_c"); + assertThat(testRoute("db_4.table_3")).containsExactlyInAnyOrder("db_4.table_c"); + } + + @Test + void testRepSymRoute() { + assertThat(testRoute("db_5.table_1")) + .containsExactlyInAnyOrder("db_5.prefix_table_1_suffix"); + assertThat(testRoute("db_5.table_2")) + .containsExactlyInAnyOrder("db_5.prefix_table_2_suffix"); + assertThat(testRoute("db_5.table_3")) + .containsExactlyInAnyOrder("db_5.prefix_table_3_suffix"); + } +} diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaDerivationTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaDerivationTest.java deleted file mode 100644 index 6b7cc5d61ac..00000000000 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/coordinator/SchemaDerivationTest.java +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.runtime.operators.schema.coordinator; - -import org.apache.flink.cdc.common.event.AddColumnEvent; -import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; -import org.apache.flink.cdc.common.event.CreateTableEvent; -import org.apache.flink.cdc.common.event.DropColumnEvent; -import org.apache.flink.cdc.common.event.RenameColumnEvent; -import org.apache.flink.cdc.common.event.SchemaChangeEvent; -import org.apache.flink.cdc.common.event.TableId; -import org.apache.flink.cdc.common.route.RouteRule; -import org.apache.flink.cdc.common.schema.Column; -import org.apache.flink.cdc.common.schema.PhysicalColumn; -import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.common.types.DataType; -import org.apache.flink.cdc.common.types.DataTypes; - -import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; - -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import static org.apache.flink.cdc.common.testutils.assertions.EventAssertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -/** Unit test for {@link SchemaDerivation}. */ -class SchemaDerivationTest { - - private static final TableId TABLE_1 = TableId.tableId("mydb", "myschema", "mytable1"); - private static final TableId TABLE_2 = TableId.tableId("mydb", "myschema", "mytable2"); - private static final TableId MERGED_TABLE = TableId.tableId("mydb", "myschema", "mytables"); - - private static final Schema SCHEMA = - Schema.newBuilder() - .column(Column.physicalColumn("id", DataTypes.BIGINT())) - .column(Column.physicalColumn("name", DataTypes.STRING())) - .column(Column.physicalColumn("age", DataTypes.INT())) - .build(); - - private static final Schema COMPATIBLE_SCHEMA = - Schema.newBuilder() - .column(Column.physicalColumn("id", DataTypes.BIGINT())) - .column(Column.physicalColumn("name", DataTypes.STRING())) - .column(Column.physicalColumn("age", DataTypes.BIGINT())) - .column(Column.physicalColumn("gender", DataTypes.STRING())) - .build(); - - private static final Schema INCOMPATIBLE_SCHEMA = - Schema.newBuilder() - .column(Column.physicalColumn("id", DataTypes.BIGINT())) - .column(Column.physicalColumn("name", DataTypes.STRING())) - .column(Column.physicalColumn("age", DataTypes.STRING())) - .column(Column.physicalColumn("gender", DataTypes.STRING())) - .build(); - - private static final List ROUTES = - Collections.singletonList( - new RouteRule("mydb.myschema.mytable[0-9]", MERGED_TABLE.toString(), null)); - - @Test - void testOneToOneMapping() { - SchemaDerivation schemaDerivation = - new SchemaDerivation(new SchemaManager(), ROUTES, new HashMap<>()); - - // Create table - List derivedChangesAfterCreateTable = - schemaDerivation.applySchemaChange(new CreateTableEvent(TABLE_1, SCHEMA)); - assertThat(derivedChangesAfterCreateTable).hasSize(1); - assertThat(derivedChangesAfterCreateTable.get(0)) - .asCreateTableEvent() - .hasTableId(MERGED_TABLE) - .hasSchema(SCHEMA); - - // Add column - AddColumnEvent.ColumnWithPosition newCol1 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col1", DataTypes.STRING(), null)); - AddColumnEvent.ColumnWithPosition newCol2 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col2", DataTypes.STRING(), null)); - AddColumnEvent.ColumnWithPosition newCol3 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col3", DataTypes.STRING(), null, "abc")); - List newColumns = - Arrays.asList(newCol1, newCol2, newCol3); - List derivedChangesAfterAddColumn = - schemaDerivation.applySchemaChange(new AddColumnEvent(TABLE_1, newColumns)); - assertThat(derivedChangesAfterAddColumn).hasSize(1); - assertThat(derivedChangesAfterAddColumn.get(0)) - .asAddColumnEvent() - .hasTableId(MERGED_TABLE) - .containsAddedColumns(newCol1, newCol2, newCol3); - - // Alter column type - ImmutableMap typeMapping = ImmutableMap.of("age", DataTypes.BIGINT()); - List derivedChangesAfterAlterTableType = - schemaDerivation.applySchemaChange(new AlterColumnTypeEvent(TABLE_1, typeMapping)); - assertThat(derivedChangesAfterAlterTableType).hasSize(1); - assertThat(derivedChangesAfterAlterTableType.get(0)) - .asAlterColumnTypeEvent() - .hasTableId(MERGED_TABLE) - .containsTypeMapping(typeMapping); - - // Drop column - List droppedColumns = Arrays.asList("new_col1", "new_col2"); - List derivedChangesAfterDropColumn = - schemaDerivation.applySchemaChange(new DropColumnEvent(TABLE_1, droppedColumns)); - assertThat(derivedChangesAfterDropColumn).hasSize(1); - assertThat(derivedChangesAfterDropColumn.get(0)) - .asDropColumnEvent() - .hasTableId(MERGED_TABLE) - .containsDroppedColumns("new_col1", "new_col2"); - - // Rename column - Map renamedColumns = ImmutableMap.of("name", "last_name"); - List derivedChangesAfterRenameColumn = - schemaDerivation.applySchemaChange(new RenameColumnEvent(TABLE_1, renamedColumns)); - assertThat(derivedChangesAfterRenameColumn).hasSize(1); - assertThat(derivedChangesAfterRenameColumn.get(0)) - .asRenameColumnEvent() - .hasTableId(MERGED_TABLE) - .containsNameMapping(renamedColumns); - } - - @Test - void testMergingTablesWithExactSameSchema() { - SchemaManager schemaManager = new SchemaManager(); - SchemaDerivation schemaDerivation = - new SchemaDerivation(schemaManager, ROUTES, new HashMap<>()); - - // Create table 1 - List derivedChangesAfterCreateTable = - schemaDerivation.applySchemaChange(new CreateTableEvent(TABLE_1, SCHEMA)); - assertThat(derivedChangesAfterCreateTable).hasSize(1); - assertThat(derivedChangesAfterCreateTable.get(0)) - .asCreateTableEvent() - .hasTableId(MERGED_TABLE) - .hasSchema(SCHEMA); - derivedChangesAfterCreateTable.forEach(schemaManager::applyEvolvedSchemaChange); - - // Create table 2 - assertThat(schemaDerivation.applySchemaChange(new CreateTableEvent(TABLE_2, SCHEMA))) - .isEmpty(); - - // Add column for table 1 - AddColumnEvent.ColumnWithPosition newCol1 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col1", DataTypes.STRING(), null)); - AddColumnEvent.ColumnWithPosition newCol2 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col2", DataTypes.STRING(), null)); - List newColumns = Arrays.asList(newCol1, newCol2); - List derivedChangesAfterAddColumn = - schemaDerivation.applySchemaChange(new AddColumnEvent(TABLE_1, newColumns)); - assertThat(derivedChangesAfterAddColumn).hasSize(1); - assertThat(derivedChangesAfterAddColumn.get(0)) - .asAddColumnEvent() - .hasTableId(MERGED_TABLE) - .containsAddedColumns(newCol1, newCol2); - derivedChangesAfterAddColumn.forEach(schemaManager::applyEvolvedSchemaChange); - - // Add column for table 2 - assertThat(schemaDerivation.applySchemaChange(new AddColumnEvent(TABLE_2, newColumns))) - .isEmpty(); - - // Alter column type for table 1 - ImmutableMap typeMapping = ImmutableMap.of("age", DataTypes.BIGINT()); - List derivedChangesAfterAlterColumnType = - schemaDerivation.applySchemaChange(new AlterColumnTypeEvent(TABLE_1, typeMapping)); - assertThat(derivedChangesAfterAlterColumnType).hasSize(1); - assertThat(derivedChangesAfterAlterColumnType.get(0)) - .asAlterColumnTypeEvent() - .hasTableId(MERGED_TABLE) - .containsTypeMapping(typeMapping); - derivedChangesAfterAlterColumnType.forEach(schemaManager::applyEvolvedSchemaChange); - - // Alter column type for table 2 - assertThat( - schemaDerivation.applySchemaChange( - new AlterColumnTypeEvent(TABLE_2, typeMapping))) - .isEmpty(); - - // Drop column for table 1 - List droppedColumns = Arrays.asList("new_col1", "new_col2"); - assertThat(schemaDerivation.applySchemaChange(new DropColumnEvent(TABLE_1, droppedColumns))) - .isEmpty(); - // Drop column for table 2 - assertThat(schemaDerivation.applySchemaChange(new DropColumnEvent(TABLE_2, droppedColumns))) - .isEmpty(); - - // Rename column for table 1 - Map renamedColumns = ImmutableMap.of("name", "last_name"); - List derivedChangesAfterRenameColumn = - schemaDerivation.applySchemaChange(new RenameColumnEvent(TABLE_1, renamedColumns)); - assertThat(derivedChangesAfterRenameColumn).hasSize(1); - assertThat(derivedChangesAfterRenameColumn.get(0)) - .asAddColumnEvent() - .hasTableId(MERGED_TABLE) - .containsAddedColumns( - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("last_name", DataTypes.STRING(), null))); - derivedChangesAfterRenameColumn.forEach(schemaManager::applyEvolvedSchemaChange); - - // Rename column for table 2 - assertThat( - schemaDerivation.applySchemaChange( - new RenameColumnEvent(TABLE_2, renamedColumns))) - .isEmpty(); - } - - @Test - void testMergingTableWithDifferentSchemas() { - SchemaManager schemaManager = new SchemaManager(); - SchemaDerivation schemaDerivation = - new SchemaDerivation(schemaManager, ROUTES, new HashMap<>()); - // Create table 1 - List derivedChangesAfterCreateTable = - schemaDerivation.applySchemaChange(new CreateTableEvent(TABLE_1, SCHEMA)); - assertThat(derivedChangesAfterCreateTable).hasSize(1); - assertThat(derivedChangesAfterCreateTable.get(0)) - .asCreateTableEvent() - .hasTableId(MERGED_TABLE) - .hasSchema(SCHEMA); - derivedChangesAfterCreateTable.forEach(schemaManager::applyEvolvedSchemaChange); - - // Create table 2 - List derivedChangesAfterCreateTable2 = - schemaDerivation.applySchemaChange( - new CreateTableEvent(TABLE_2, COMPATIBLE_SCHEMA)); - assertThat(derivedChangesAfterCreateTable2).hasSize(2); - assertThat(derivedChangesAfterCreateTable2) - .containsExactlyInAnyOrder( - new AddColumnEvent( - MERGED_TABLE, - Collections.singletonList( - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn( - "gender", DataTypes.STRING(), null)))), - new AlterColumnTypeEvent( - MERGED_TABLE, ImmutableMap.of("age", DataTypes.BIGINT()))); - derivedChangesAfterCreateTable2.forEach(schemaManager::applyEvolvedSchemaChange); - - // Add column for table 1 - AddColumnEvent.ColumnWithPosition newCol1 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col1", DataTypes.VARCHAR(255), null)); - AddColumnEvent.ColumnWithPosition newCol2 = - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("new_col2", DataTypes.VARCHAR(255), null)); - List newColumns = Arrays.asList(newCol1, newCol2); - List derivedChangesAfterAddColumn = - schemaDerivation.applySchemaChange(new AddColumnEvent(TABLE_1, newColumns)); - assertThat(derivedChangesAfterAddColumn).hasSize(1); - assertThat(derivedChangesAfterAddColumn.get(0)) - .asAddColumnEvent() - .hasTableId(MERGED_TABLE) - .containsAddedColumns(newCol1, newCol2); - derivedChangesAfterAddColumn.forEach(schemaManager::applyEvolvedSchemaChange); - - // Add column for table 2 - List derivedChangesAfterAddColumnForTable2 = - schemaDerivation.applySchemaChange( - new AddColumnEvent( - TABLE_2, - Arrays.asList( - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn( - "new_col1", DataTypes.STRING(), null)), - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn( - "new_col2", DataTypes.STRING(), null))))); - assertThat(derivedChangesAfterAddColumnForTable2).hasSize(1); - assertThat(derivedChangesAfterAddColumnForTable2.get(0)) - .asAlterColumnTypeEvent() - .containsTypeMapping( - ImmutableMap.of( - "new_col1", DataTypes.STRING(), "new_col2", DataTypes.STRING())); - derivedChangesAfterAddColumnForTable2.forEach(schemaManager::applyEvolvedSchemaChange); - - // Alter column type for table 1 - ImmutableMap typeMapping = ImmutableMap.of("age", DataTypes.BIGINT()); - List derivedChangesAfterAlterColumnType = - schemaDerivation.applySchemaChange(new AlterColumnTypeEvent(TABLE_1, typeMapping)); - assertThat(derivedChangesAfterAlterColumnType).isEmpty(); - // Alter column type for table 2 - List derivedChangesAfterAlterColumnTypeForTable2 = - schemaDerivation.applySchemaChange( - new AlterColumnTypeEvent( - TABLE_2, ImmutableMap.of("age", DataTypes.TINYINT()))); - assertThat(derivedChangesAfterAlterColumnTypeForTable2).isEmpty(); - - // Drop column for table 1 - List droppedColumns = Arrays.asList("new_col1", "new_col2"); - assertThat(schemaDerivation.applySchemaChange(new DropColumnEvent(TABLE_1, droppedColumns))) - .isEmpty(); - // Drop column for table 2 - assertThat(schemaDerivation.applySchemaChange(new DropColumnEvent(TABLE_2, droppedColumns))) - .isEmpty(); - - // Rename column for table 1 - Map renamedColumns = ImmutableMap.of("name", "last_name"); - List derivedChangesAfterRenameColumn = - schemaDerivation.applySchemaChange(new RenameColumnEvent(TABLE_1, renamedColumns)); - assertThat(derivedChangesAfterRenameColumn).hasSize(1); - assertThat(derivedChangesAfterRenameColumn.get(0)) - .asAddColumnEvent() - .hasTableId(MERGED_TABLE) - .containsAddedColumns( - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("last_name", DataTypes.STRING(), null))); - derivedChangesAfterRenameColumn.forEach(schemaManager::applyEvolvedSchemaChange); - - // Rename column for table 2 - List derivedChangesAfterRenameColumnForTable2 = - schemaDerivation.applySchemaChange( - new RenameColumnEvent(TABLE_2, ImmutableMap.of("name", "first_name"))); - assertThat(derivedChangesAfterRenameColumnForTable2).hasSize(1); - assertThat(derivedChangesAfterRenameColumnForTable2.get(0)) - .asAddColumnEvent() - .hasTableId(MERGED_TABLE) - .containsAddedColumns( - new AddColumnEvent.ColumnWithPosition( - new PhysicalColumn("first_name", DataTypes.STRING(), null))); - derivedChangesAfterRenameColumnForTable2.forEach(schemaManager::applyEvolvedSchemaChange); - - assertThat(schemaManager.getLatestEvolvedSchema(MERGED_TABLE)) - .contains( - Schema.newBuilder() - .column(Column.physicalColumn("id", DataTypes.BIGINT())) - .column(Column.physicalColumn("name", DataTypes.STRING())) - .column(Column.physicalColumn("age", DataTypes.BIGINT())) - .column(Column.physicalColumn("gender", DataTypes.STRING())) - .column(Column.physicalColumn("new_col1", DataTypes.STRING())) - .column(Column.physicalColumn("new_col2", DataTypes.STRING())) - .column(Column.physicalColumn("last_name", DataTypes.STRING())) - .column(Column.physicalColumn("first_name", DataTypes.STRING())) - .build()); - } - - @Test - void testIncompatibleTypes() { - SchemaManager schemaManager = new SchemaManager(); - SchemaDerivation schemaDerivation = - new SchemaDerivation(schemaManager, ROUTES, new HashMap<>()); - // Create table 1 - List derivedChangesAfterCreateTable = - schemaDerivation.applySchemaChange(new CreateTableEvent(TABLE_1, SCHEMA)); - assertThat(derivedChangesAfterCreateTable).hasSize(1); - assertThat(derivedChangesAfterCreateTable.get(0)) - .asCreateTableEvent() - .hasTableId(MERGED_TABLE) - .hasSchema(SCHEMA); - derivedChangesAfterCreateTable.forEach(schemaManager::applyEvolvedSchemaChange); - - // Create table 2 - assertThatThrownBy( - () -> - schemaDerivation.applySchemaChange( - new CreateTableEvent(TABLE_2, INCOMPATIBLE_SCHEMA))) - .isInstanceOf(IllegalStateException.class) - .hasMessage("Incompatible types found for column `age`: \"INT\" and \"STRING\""); - } - - @Test - void testSerde() throws Exception { - Map> derivationMapping = new HashMap<>(); - Set originalTableIds = new HashSet<>(); - originalTableIds.add(TABLE_1); - originalTableIds.add(TABLE_2); - derivationMapping.put(MERGED_TABLE, originalTableIds); - SchemaDerivation schemaDerivation = - new SchemaDerivation(new SchemaManager(), ROUTES, derivationMapping); - try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(baos)) { - SchemaDerivation.serializeDerivationMapping(schemaDerivation, out); - byte[] serialized = baos.toByteArray(); - try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); - DataInputStream in = new DataInputStream(bais)) { - Map> deserialized = - SchemaDerivation.deserializerDerivationMapping(in); - assertThat(deserialized).isEqualTo(derivationMapping); - } - } - } -} diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaMapperTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaMapperTest.java new file mode 100644 index 00000000000..9acf00d0c6a --- /dev/null +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/distributed/SchemaMapperTest.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.operators.schema.distributed; + +import org.apache.flink.cdc.common.event.AddColumnEvent; +import org.apache.flink.cdc.common.event.AlterColumnTypeEvent; +import org.apache.flink.cdc.common.event.CreateTableEvent; +import org.apache.flink.cdc.common.event.DropColumnEvent; +import org.apache.flink.cdc.common.event.DropTableEvent; +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.FlushEvent; +import org.apache.flink.cdc.common.event.RenameColumnEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.event.TruncateTableEvent; +import org.apache.flink.cdc.common.schema.Column; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaTestBase; +import org.apache.flink.cdc.runtime.testutils.operators.DistributedEventOperatorTestHarness; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.function.BiConsumerWithException; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.LinkedList; +import java.util.function.Function; +import java.util.function.Supplier; + +/** Unit test cases for {@link SchemaOperator}. */ +public class SchemaMapperTest extends SchemaTestBase { + private static final TableId TABLE_ID = TableId.parse("foo.bar.baz"); + private static final Schema INITIAL_SCHEMA = + Schema.newBuilder() + .physicalColumn("id", DataTypes.INT().notNull()) + .physicalColumn("name", DataTypes.VARCHAR(128)) + .physicalColumn("age", DataTypes.FLOAT()) + .physicalColumn("notes", DataTypes.STRING().notNull()) + .build(); + + @Test + void testSchemaEvolution() throws Exception { + CreateTableEvent createTableEvent = new CreateTableEvent(TABLE_ID, INITIAL_SCHEMA); + AddColumnEvent addColumnEvent = + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.AFTER, + "id"))); + AddColumnEvent addColumnEventAtLast = + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("added_flag", DataTypes.BOOLEAN()), + AddColumnEvent.ColumnPosition.LAST, + null))); + + RenameColumnEvent renameColumnEvent = + new RenameColumnEvent(TABLE_ID, Collections.singletonMap("notes", "footnotes")); + AddColumnEvent appendRenamedColumnAtLast = + new AddColumnEvent( + TABLE_ID, + Collections.singletonList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("footnotes", DataTypes.STRING()), + AddColumnEvent.ColumnPosition.LAST, + null))); + AlterColumnTypeEvent alterColumnTypeEvent = + new AlterColumnTypeEvent( + TABLE_ID, Collections.singletonMap("age", DataTypes.DOUBLE())); + AlterColumnTypeEvent alterColumnTypeEventWithBackfill = + new AlterColumnTypeEvent( + TABLE_ID, + Collections.singletonMap("age", DataTypes.DOUBLE()), + Collections.singletonMap("age", DataTypes.FLOAT())); + + DropColumnEvent dropColumnEvent = + new DropColumnEvent(TABLE_ID, Collections.singletonList("footnotes")); + TruncateTableEvent truncateTableEvent = new TruncateTableEvent(TABLE_ID); + DropTableEvent dropTableEvent = new DropTableEvent(TABLE_ID); + + Assertions.assertThat( + runInHarness( + () -> + new SchemaOperator( + ROUTING_RULES, Duration.ofMinutes(3), "UTC"), + (op) -> + new DistributedEventOperatorTestHarness<>( + op, + 20, + Duration.ofSeconds(3), + Duration.ofMinutes(3)), + (operator, harness) -> { + + // Create a Table + operator.processElement(wrap(createTableEvent)); + operator.processElement( + wrap( + genInsert( + TABLE_ID, "ISFS", 1, "Alice", 17.1828f, + "Hello"))); + + // Add a Column + operator.processElement(wrap(addColumnEvent)); + operator.processElement( + wrap( + genInsert( + TABLE_ID, + "IBSFS", + 2, + false, + "Bob", + 31.415926f, + "Bye-bye"))); + + // Rename a Column + operator.processElement(wrap(renameColumnEvent)); + operator.processElement( + wrap( + genInsert( + TABLE_ID, "IBSFS", 3, true, "Cicada", + 123.456f, "Ok"))); + + // Alter a Column's Type + operator.processElement(wrap(alterColumnTypeEvent)); + operator.processElement( + wrap( + genInsert( + TABLE_ID, + "IBSDS", + 4, + false, + "Derrida", + 7.81876754837, + "Nah"))); + + // Drop a column + operator.processElement(wrap(dropColumnEvent)); + operator.processElement( + wrap( + genInsert( + TABLE_ID, "IBSD", 5, true, "Eve", + 1.414))); + + // Truncate a table + operator.processElement(wrap(truncateTableEvent)); + operator.processElement( + wrap( + genInsert( + TABLE_ID, "IBSD", 6, false, "Ferris", + 0.001))); + + // Drop a table + operator.processElement(wrap(dropTableEvent)); + })) + .map(StreamRecord::getValue) + .containsExactly( + FlushEvent.ofAll(), + createTableEvent, + genInsert(TABLE_ID, "ISFS", 1, "Alice", 17.1828f, "Hello"), + FlushEvent.ofAll(), + addColumnEventAtLast, + genInsert(TABLE_ID, "ISFSB", 2, "Bob", 31.415926f, "Bye-bye", false), + FlushEvent.ofAll(), + appendRenamedColumnAtLast, + genInsert(TABLE_ID, "ISFSBS", 3, "Cicada", 123.456f, null, true, "Ok"), + FlushEvent.ofAll(), + alterColumnTypeEventWithBackfill, + genInsert( + TABLE_ID, + "ISDSBS", + 4, + "Derrida", + 7.81876754837, + null, + false, + "Nah"), + FlushEvent.ofAll(), + genInsert(TABLE_ID, "ISDSBS", 5, "Eve", 1.414, null, true, null), + FlushEvent.ofAll(), + genInsert(TABLE_ID, "ISDSBS", 6, "Ferris", 0.001, null, false, null), + FlushEvent.ofAll()); + } + + protected static , E extends Event, T extends Throwable> + LinkedList> runInHarness( + Supplier opCreator, + Function> harnessCreator, + BiConsumerWithException, T> + closure) + throws T, Exception { + OP operator = opCreator.get(); + try (DistributedEventOperatorTestHarness harness = harnessCreator.apply(operator)) { + harness.open(); + closure.accept(operator, harness); + return harness.getOutputRecords(); + } + } +} diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/SchemaEvolveTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaEvolveTest.java similarity index 96% rename from flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/SchemaEvolveTest.java rename to flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaEvolveTest.java index d60b6d55a9d..f2798678804 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/SchemaEvolveTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaEvolveTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema; +package org.apache.flink.cdc.runtime.operators.schema.regular; import org.apache.flink.cdc.common.data.RecordData; import org.apache.flink.cdc.common.data.binary.BinaryStringData; @@ -29,6 +29,7 @@ import org.apache.flink.cdc.common.event.SchemaChangeEventType; import org.apache.flink.cdc.common.event.SchemaChangeEventTypeFamily; import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.exceptions.SchemaEvolveException; import org.apache.flink.cdc.common.exceptions.UnsupportedSchemaChangeEventException; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.route.RouteRule; @@ -37,10 +38,11 @@ import org.apache.flink.cdc.common.types.DataType; import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.RowType; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.cdc.runtime.typeutils.NonceUtils; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.FlinkRuntimeException; import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; import org.apache.flink.shaded.guava31.com.google.common.collect.Sets; @@ -54,6 +56,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; /** Unit tests for the {@link SchemaOperator} to handle evolved schema. */ @@ -106,8 +109,9 @@ public void testEvolveSchema() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 17, Duration.ofSeconds(3), behavior); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndBehavior( + schemaOperator, 17, Duration.ofSeconds(3), behavior); harness.open(); // Test CreateTableEvent @@ -376,8 +380,9 @@ public void testTryEvolveSchema() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 17, Duration.ofSeconds(3), behavior); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndBehavior( + schemaOperator, 17, Duration.ofSeconds(3), behavior); harness.open(); // Test CreateTableEvent @@ -646,8 +651,9 @@ public void testExceptionEvolveSchema() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 17, Duration.ofSeconds(3), behavior); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndBehavior( + schemaOperator, 17, Duration.ofSeconds(3), behavior); harness.open(); // Test CreateTableEvent @@ -700,45 +706,25 @@ public void testExceptionEvolveSchema() throws Exception { new AddColumnEvent.ColumnWithPosition( Column.physicalColumn( "height", DOUBLE, "Height data"))))); - Assertions.assertThatThrownBy(() -> processEvent(schemaOperator, addColumnEvents)); + Assertions.assertThatThrownBy(() -> processEvent(schemaOperator, addColumnEvents)) + .isExactlyInstanceOf(IllegalStateException.class) + .cause() + .isExactlyInstanceOf(ExecutionException.class) + .cause() + .isExactlyInstanceOf(FlinkRuntimeException.class) + .hasMessage("Failed to apply schema change event.") + .cause() + .isExactlyInstanceOf(SchemaEvolveException.class) + .extracting("applyingEvent", "exceptionMessage") + .containsExactly( + addColumnEvents.get(0), + "Unexpected schema change events occurred in EXCEPTION mode. Job will fail now."); // No schema change events should be sent to downstream - Assertions.assertThat(harness.getOutputRecords()).isEmpty(); - } - - // Test RenameColumnEvent (expected to fail) - { - List addColumnEvents = - Collections.singletonList( - new RenameColumnEvent( - tableId, ImmutableMap.of("name", "namae", "age", "toshi"))); - Assertions.assertThatThrownBy(() -> processEvent(schemaOperator, addColumnEvents)); - - // No schema change events should be sent to downstream - Assertions.assertThat(harness.getOutputRecords()).isEmpty(); - } - - // Test AlterColumnTypeEvent (expected to fail) - { - List addColumnEvents = - Collections.singletonList( - new AlterColumnTypeEvent( - tableId, ImmutableMap.of("score", BIGINT, "toshi", FLOAT))); - Assertions.assertThatThrownBy(() -> processEvent(schemaOperator, addColumnEvents)); - - // No schema change events should be sent to downstream - Assertions.assertThat(harness.getOutputRecords()).isEmpty(); - } - - // Test DropColumnEvent (expected to fail) - { - List addColumnEvents = - Collections.singletonList( - new DropColumnEvent(tableId, Arrays.asList("score", "height"))); - Assertions.assertThatThrownBy(() -> processEvent(schemaOperator, addColumnEvents)); - - // No schema change events should be sent to downstream - Assertions.assertThat(harness.getOutputRecords()).isEmpty(); + Assertions.assertThat(harness.getOutputRecords()) + .map(StreamRecord::getValue) + .map(e -> e.getClass().getName()) + .containsExactly("org.apache.flink.cdc.common.event.FlushEvent"); } harness.close(); @@ -760,8 +746,9 @@ public void testIgnoreEvolveSchema() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 17, Duration.ofSeconds(3), behavior); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndBehavior( + schemaOperator, 17, Duration.ofSeconds(3), behavior); harness.open(); // Test CreateTableEvent @@ -1045,8 +1032,8 @@ public void testEvolveSchemaWithFailure() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>( + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndFineGrainedBehaviorWithError( schemaOperator, 17, Duration.ofSeconds(3), @@ -1100,7 +1087,19 @@ tableId, buildRecord(INT, 2, STRING, "Bob", SMALLINT, (short) 18)), new AddColumnEvent.ColumnWithPosition( Column.physicalColumn( "height", DOUBLE, "Height data"))))); - processEvent(schemaOperator, addColumnEvents); + Assertions.assertThatThrownBy(() -> processEvent(schemaOperator, addColumnEvents)) + .isExactlyInstanceOf(IllegalStateException.class) + .cause() + .isExactlyInstanceOf(ExecutionException.class) + .cause() + .isExactlyInstanceOf(FlinkRuntimeException.class) + .hasMessage("Failed to apply schema change event.") + .cause() + .isExactlyInstanceOf(UnsupportedSchemaChangeEventException.class) + .extracting("applyingEvent", "exceptionMessage") + .containsExactly( + addColumnEvents.get(0), "Sink doesn't support such schema change event."); + Assertions.assertThat(harness.isJobFailed()).isEqualTo(true); Assertions.assertThat(harness.getJobFailureCause()) .cause() @@ -1132,8 +1131,8 @@ public void testTryEvolveSchemaWithFailure() throws Exception { // All types of schema change events will be sent to the sink // AddColumn and RenameColumn events will always fail - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>( + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndFineGrainedBehaviorWithError( schemaOperator, 17, Duration.ofSeconds(3), @@ -1452,8 +1451,8 @@ public void testFineGrainedSchemaEvolves() throws Exception { // All types of schema change events will be sent to the sink // AddColumn and RenameColumn events will always fail - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>( + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndFineGrainedBehavior( schemaOperator, 17, Duration.ofSeconds(3), @@ -1770,8 +1769,9 @@ public void testLenientSchemaEvolves() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 17, Duration.ofSeconds(3), behavior); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndBehavior( + schemaOperator, 17, Duration.ofSeconds(3), behavior); harness.open(); // Test CreateTableEvent @@ -2156,8 +2156,9 @@ public void testLenientEvolveTweaks() throws Exception { SchemaOperator schemaOperator = new MockedSchemaOperator(new ArrayList<>(), Duration.ofSeconds(30), behavior); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 17, Duration.ofSeconds(3), behavior); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDurationAndBehavior( + schemaOperator, 17, Duration.ofSeconds(3), behavior); harness.open(); // Test CreateTableEvent diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperatorTest.java similarity index 73% rename from flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperatorTest.java rename to flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperatorTest.java index eb45a253113..9feaf29d3bf 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/SchemaOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/regular/SchemaOperatorTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.runtime.operators.schema; +package org.apache.flink.cdc.runtime.operators.schema.regular; import org.apache.flink.cdc.common.data.binary.BinaryStringData; import org.apache.flink.cdc.common.event.CreateTableEvent; @@ -26,13 +26,13 @@ import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.RowType; import org.apache.flink.cdc.runtime.serializer.event.EventSerializer; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.runtime.jobgraph.OperatorID; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.time.Duration; @@ -46,6 +46,8 @@ import java.util.stream.Collectors; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatCode; /** Unit tests for the {@link SchemaOperator}. */ public class SchemaOperatorTest { @@ -67,14 +69,21 @@ void testProcessElement() throws Exception { final OperatorID opID = new OperatorID(); final TableId tableId = TableId.tableId("testProcessElement"); final RowType rowType = DataTypes.ROW(DataTypes.BIGINT(), DataTypes.STRING()); + final Schema schema = + Schema.newBuilder() + .physicalColumn("id", DataTypes.BIGINT()) + .physicalColumn("name", DataTypes.STRING()) + .build(); List> testHarnesses = new ArrayList<>(); for (int subtaskIndex = 0; subtaskIndex < parallelism; subtaskIndex++) { + SchemaOperator operator = new SchemaOperator(new ArrayList<>()); OneInputStreamOperatorTestHarness testHarness = - createTestHarness(maxParallelism, parallelism, subtaskIndex, opID); + createTestHarness(maxParallelism, parallelism, subtaskIndex, opID, operator); testHarnesses.add(testHarness); testHarness.setup(EventSerializer.INSTANCE); testHarness.open(); + operator.registerInitialSchema(tableId, schema); Map meta = new HashMap<>(); meta.put("subtask", String.valueOf(subtaskIndex)); @@ -114,15 +123,18 @@ void testProcessElement() throws Exception { void testProcessSchemaChangeEventWithTimeOut() throws Exception { SchemaOperator schemaOperator = new SchemaOperator(new ArrayList<>(), Duration.ofSeconds(1)); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 1, Duration.ofSeconds(3)); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDuration( + schemaOperator, 1, Duration.ofSeconds(3)); harness.open(); - Assertions.assertThrowsExactly( - TimeoutException.class, - () -> - schemaOperator.processElement( - new StreamRecord<>( - new CreateTableEvent(CUSTOMERS, CUSTOMERS_SCHEMA)))); + assertThatThrownBy( + () -> + schemaOperator.processElement( + new StreamRecord<>( + new CreateTableEvent(CUSTOMERS, CUSTOMERS_SCHEMA)))) + .isExactlyInstanceOf(IllegalStateException.class) + .cause() + .isExactlyInstanceOf(TimeoutException.class); harness.close(); } @@ -130,22 +142,28 @@ void testProcessSchemaChangeEventWithTimeOut() throws Exception { void testProcessSchemaChangeEventWithOutTimeOut() throws Exception { SchemaOperator schemaOperator = new SchemaOperator(new ArrayList<>(), Duration.ofSeconds(30)); - EventOperatorTestHarness harness = - new EventOperatorTestHarness<>(schemaOperator, 1, Duration.ofSeconds(3)); + RegularEventOperatorTestHarness harness = + RegularEventOperatorTestHarness.withDuration( + schemaOperator, 1, Duration.ofSeconds(3)); harness.open(); - Assertions.assertDoesNotThrow( - () -> - schemaOperator.processElement( - new StreamRecord<>( - new CreateTableEvent(CUSTOMERS, CUSTOMERS_SCHEMA)))); + assertThatCode( + () -> + schemaOperator.processElement( + new StreamRecord<>( + new CreateTableEvent(CUSTOMERS, CUSTOMERS_SCHEMA)))) + .doesNotThrowAnyException(); harness.close(); } private OneInputStreamOperatorTestHarness createTestHarness( - int maxParallelism, int parallelism, int subtaskIndex, OperatorID opID) + int maxParallelism, + int parallelism, + int subtaskIndex, + OperatorID opID, + OneInputStreamOperator operator) throws Exception { return new OneInputStreamOperatorTestHarness<>( - new SchemaOperator(new ArrayList<>()), + operator, maxParallelism, parallelism, subtaskIndex, diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java index 9e0e9843c33..78268f7cabd 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java @@ -27,7 +27,7 @@ import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.RowType; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -245,9 +245,9 @@ void testDataChangeEventTransform() throws Exception { "*, concat(col1,col2) col12", "col1 = '1'") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -322,6 +322,7 @@ void testDataChangeEventTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -337,9 +338,9 @@ void testDataChangeEventTransformTwice() throws Exception { "*, concat(col1, '2') col12", "col1 = '2'") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -427,6 +428,7 @@ void testDataChangeEventTransformTwice() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -435,9 +437,9 @@ void testDataChangeEventTransformProjectionDataTypeConvert() throws Exception { PostTransformOperator.newBuilder() .addTransform(DATATYPE_TABLEID.identifier(), "*", null, null, null, null) .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -473,6 +475,7 @@ void testDataChangeEventTransformProjectionDataTypeConvert() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEvent)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -484,9 +487,9 @@ void testMetadataTransform() throws Exception { "*, __namespace_name__ || '.' || __schema_name__ || '.' || __table_name__ identifier_name, __namespace_name__, __schema_name__, __table_name__", " __table_name__ = 'metadata_table' ") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -521,6 +524,7 @@ void testMetadataTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -532,9 +536,9 @@ void testMetadataASTransform() throws Exception { "sid, name, UPPER(name) as name_upper, __table_name__ as tbname", "sid < 3") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -568,6 +572,7 @@ void testMetadataASTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -579,9 +584,9 @@ void testDataChangeEventTransformWithDuplicateColumns() throws Exception { "col1, col2, col2 * col2 as square_col2", "col2 < 3 OR col2 > 5") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -629,6 +634,7 @@ void testDataChangeEventTransformWithDuplicateColumns() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isNull(); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -643,9 +649,9 @@ void testTimestampTransform() throws Exception { "LOCALTIMESTAMP = CAST(CURRENT_TIMESTAMP AS TIMESTAMP)") .addTimezone("UTC") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -674,6 +680,7 @@ void testTimestampTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -696,9 +703,9 @@ void testTimestampDiffTransform() throws Exception { "col1='2'") .addTimezone("Asia/Shanghai") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -743,6 +750,7 @@ void testTimestampDiffTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect2)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -755,9 +763,9 @@ void testTimezoneTransform() throws Exception { null) .addTimezone("UTC") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -788,6 +796,7 @@ void testTimezoneTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -811,9 +820,9 @@ void testNullCastTransform() throws Exception { + ",cast(colString as TIMESTAMP(3)) as nullTimestamp", null) .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -848,6 +857,7 @@ void testNullCastTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEvent)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -1005,9 +1015,9 @@ void testCastTransform() throws Exception { + ",cast('1970-01-01T00:00:01.234' as TIMESTAMP(3)) as castTimestamp", "col1 = '10'") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -1420,6 +1430,7 @@ void testCastTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect10)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -1442,9 +1453,9 @@ void testCastErrorTransform() throws Exception { + ",cast(castFloat as TIMESTAMP(3)) as castTimestamp", "col1 = '1'") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -1481,6 +1492,7 @@ void testCastErrorTransform() throws Exception { .isExactlyInstanceOf(RuntimeException.class) .hasRootCauseInstanceOf(NumberFormatException.class) .hasRootCauseMessage("For input string: \"1.0\""); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -1569,9 +1581,9 @@ private void testExpressionConditionTransform(String expression) throws Exceptio expression) .addTimezone("UTC") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -1600,6 +1612,7 @@ private void testExpressionConditionTransform(String expression) throws Exceptio Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(insertEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -1612,9 +1625,9 @@ public void testReduceSchemaTransform() throws Exception { "newage > 17 and ref2 > 17") .addTimezone("GMT") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -1702,6 +1715,7 @@ public void testReduceSchemaTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -1714,9 +1728,9 @@ public void testWildcardSchemaTransform() throws Exception { "newage > 17") .addTimezone("GMT") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -1798,5 +1812,6 @@ public void testWildcardSchemaTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } } diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java index 0e3553e8c46..9ebf31089e8 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java @@ -27,7 +27,7 @@ import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.RowType; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -193,9 +193,9 @@ void testEventTransform() throws Exception { "col12", "key1=value1,key2=value2") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -289,6 +289,7 @@ void testEventTransform() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -303,9 +304,9 @@ public void testNullabilityColumn() throws Exception { "id", "key1=value1,key2=value2") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -319,6 +320,7 @@ public void testNullabilityColumn() throws Exception { new StreamRecord<>( new CreateTableEvent( CUSTOMERS_TABLEID, EXPECTED_NULLABILITY_SCHEMA))); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -333,9 +335,9 @@ public void testReduceTransformColumn() throws Exception { "id", "key1=value1,key2=value2") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -426,6 +428,7 @@ public void testReduceTransformColumn() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -440,9 +443,9 @@ public void testWildcardTransformColumn() throws Exception { "id", "key1=value1,key2=value2") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -516,6 +519,7 @@ public void testWildcardTransformColumn() throws Exception { Assertions.assertThat( transformFunctionEventEventOperatorTestHarness.getOutputRecords().poll()) .isEqualTo(new StreamRecord<>(updateEventExpect)); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -528,9 +532,9 @@ void testMetadataTransform() throws Exception { " __table_name__ = 'metadata_table' ") .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -542,6 +546,7 @@ void testMetadataTransform() throws Exception { .isEqualTo( new StreamRecord<>( new CreateTableEvent(METADATA_TABLEID, EXPECTED_METADATA_SCHEMA))); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -563,9 +568,9 @@ void testMultiTransformWithDiffRefColumns() throws Exception { null, null) .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -579,6 +584,7 @@ void testMultiTransformWithDiffRefColumns() throws Exception { new StreamRecord<>( new CreateTableEvent( CUSTOMERS_TABLEID, EXPECTED_MULTITRANSFORM_SCHEMA))); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -600,9 +606,9 @@ void testMultiTransformWithAsterisk() throws Exception { null, null) .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -615,6 +621,7 @@ void testMultiTransformWithAsterisk() throws Exception { .isEqualTo( new StreamRecord<>( new CreateTableEvent(CUSTOMERS_TABLEID, MULTITRANSFORM_SCHEMA))); + transformFunctionEventEventOperatorTestHarness.close(); } @Test @@ -631,9 +638,9 @@ void testMultiTransformMissingProjection() throws Exception { null, null) .build(); - EventOperatorTestHarness + RegularEventOperatorTestHarness transformFunctionEventEventOperatorTestHarness = - new EventOperatorTestHarness<>(transform, 1); + RegularEventOperatorTestHarness.with(transform, 1); // Initialization transformFunctionEventEventOperatorTestHarness.open(); // Create table @@ -646,5 +653,6 @@ void testMultiTransformMissingProjection() throws Exception { .isEqualTo( new StreamRecord<>( new CreateTableEvent(CUSTOMERS_TABLEID, MULTITRANSFORM_SCHEMA))); + transformFunctionEventEventOperatorTestHarness.close(); } } diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/TransformOperatorWithSchemaEvolveTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/TransformOperatorWithSchemaEvolveTest.java index 7613c930b50..ffc6da850f2 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/TransformOperatorWithSchemaEvolveTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/TransformOperatorWithSchemaEvolveTest.java @@ -29,7 +29,7 @@ import org.apache.flink.cdc.common.schema.Column; import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; @@ -72,8 +72,10 @@ static class TransformWithSchemaEvolveTestCase { private PreTransformOperator preTransformOperator; private PostTransformOperator postTransformOperator; - private EventOperatorTestHarness preTransformOperatorHarness; - private EventOperatorTestHarness postTransformOperatorHarness; + private RegularEventOperatorTestHarness + preTransformOperatorHarness; + private RegularEventOperatorTestHarness + postTransformOperatorHarness; public static TransformWithSchemaEvolveTestCase of( TableId tableId, @@ -152,7 +154,8 @@ private TransformWithSchemaEvolveTestCase initializeHarness() throws Exception { .addTransform( tableId.identifier(), projectionExpression, filterExpression) .build(); - preTransformOperatorHarness = new EventOperatorTestHarness<>(preTransformOperator, 1); + preTransformOperatorHarness = + RegularEventOperatorTestHarness.with(preTransformOperator, 1); preTransformOperatorHarness.open(); postTransformOperator = @@ -160,7 +163,8 @@ private TransformWithSchemaEvolveTestCase initializeHarness() throws Exception { .addTransform( tableId.identifier(), projectionExpression, filterExpression) .build(); - postTransformOperatorHarness = new EventOperatorTestHarness<>(postTransformOperator, 1); + postTransformOperatorHarness = + RegularEventOperatorTestHarness.with(postTransformOperator, 1); postTransformOperatorHarness.open(); return this; } diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/UnifiedTransformOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/UnifiedTransformOperatorTest.java index 43a364e5153..227f8c57181 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/UnifiedTransformOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/UnifiedTransformOperatorTest.java @@ -27,7 +27,7 @@ import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.RowType; import org.apache.flink.cdc.common.utils.SchemaUtils; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -72,8 +72,10 @@ static class UnifiedTransformTestCase { private final BinaryRecordDataGenerator preTransformedRecordGenerator; private final BinaryRecordDataGenerator postTransformedRecordGenerator; - private EventOperatorTestHarness preTransformOperatorHarness; - private EventOperatorTestHarness postTransformOperatorHarness; + private RegularEventOperatorTestHarness + preTransformOperatorHarness; + private RegularEventOperatorTestHarness + postTransformOperatorHarness; public static UnifiedTransformTestCase of( TableId tableId, @@ -241,7 +243,8 @@ private UnifiedTransformTestCase initializeHarness() throws Exception { .addTransform( tableId.identifier(), projectionExpression, filterExpression) .build(); - preTransformOperatorHarness = new EventOperatorTestHarness<>(preTransformOperator, 1); + preTransformOperatorHarness = + RegularEventOperatorTestHarness.with(preTransformOperator, 1); preTransformOperatorHarness.open(); postTransformOperator = @@ -249,7 +252,8 @@ private UnifiedTransformTestCase initializeHarness() throws Exception { .addTransform( tableId.identifier(), projectionExpression, filterExpression) .build(); - postTransformOperatorHarness = new EventOperatorTestHarness<>(postTransformOperator, 1); + postTransformOperatorHarness = + RegularEventOperatorTestHarness.with(postTransformOperator, 1); postTransformOperatorHarness.open(); return this; } diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperatorTest.java index 413e180edba..a2d13381224 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/partitioning/PrePartitionOperatorTest.java @@ -26,7 +26,7 @@ import org.apache.flink.cdc.common.sink.DefaultDataChangeEventHashFunctionProvider; import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.RowType; -import org.apache.flink.cdc.runtime.testutils.operators.EventOperatorTestHarness; +import org.apache.flink.cdc.runtime.testutils.operators.RegularEventOperatorTestHarness; import org.apache.flink.cdc.runtime.testutils.schema.TestingSchemaRegistryGateway; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -35,7 +35,7 @@ import static org.assertj.core.api.Assertions.assertThat; -/** Unit test for {@link PrePartitionOperator}. */ +/** Unit test for {@link RegularPrePartitionOperator}. */ class PrePartitionOperatorTest { private static final TableId CUSTOMERS = TableId.tableId("my_company", "my_branch", "customers"); @@ -52,54 +52,56 @@ class PrePartitionOperatorTest { @Test void testBroadcastingSchemaChangeEvent() throws Exception { - try (EventOperatorTestHarness testHarness = - createTestHarness()) { + try (RegularEventOperatorTestHarness + testHarness = createTestHarness()) { // Initialization testHarness.open(); testHarness.registerTableSchema(CUSTOMERS, CUSTOMERS_SCHEMA, TEST_NONCE); // CreateTableEvent - PrePartitionOperator operator = testHarness.getOperator(); + RegularPrePartitionOperator operator = testHarness.getOperator(); CreateTableEvent createTableEvent = new CreateTableEvent(CUSTOMERS, CUSTOMERS_SCHEMA); operator.processElement(new StreamRecord<>(createTableEvent)); assertThat(testHarness.getOutputRecords()).hasSize(DOWNSTREAM_PARALLELISM); for (int i = 0; i < DOWNSTREAM_PARALLELISM; i++) { assertThat(testHarness.getOutputRecords().poll()) - .isEqualTo(new StreamRecord<>(new PartitioningEvent(createTableEvent, i))); + .isEqualTo( + new StreamRecord<>( + PartitioningEvent.ofRegular(createTableEvent, i))); } } } @Test void testBroadcastingFlushEvent() throws Exception { - try (EventOperatorTestHarness testHarness = - createTestHarness()) { + try (RegularEventOperatorTestHarness + testHarness = createTestHarness()) { // Initialization testHarness.open(); testHarness.registerTableSchema(CUSTOMERS, CUSTOMERS_SCHEMA, TEST_NONCE); // FlushEvent - PrePartitionOperator operator = testHarness.getOperator(); - FlushEvent flushEvent = new FlushEvent(CUSTOMERS, TEST_NONCE); + RegularPrePartitionOperator operator = testHarness.getOperator(); + FlushEvent flushEvent = FlushEvent.of(CUSTOMERS, TEST_NONCE); operator.processElement(new StreamRecord<>(flushEvent)); assertThat(testHarness.getOutputRecords()).hasSize(DOWNSTREAM_PARALLELISM); for (int i = 0; i < DOWNSTREAM_PARALLELISM; i++) { assertThat(testHarness.getOutputRecords().poll()) - .isEqualTo(new StreamRecord<>(new PartitioningEvent(flushEvent, i))); + .isEqualTo(new StreamRecord<>(PartitioningEvent.ofRegular(flushEvent, i))); } } } @Test void testPartitioningDataChangeEvent() throws Exception { - try (EventOperatorTestHarness testHarness = - createTestHarness()) { + try (RegularEventOperatorTestHarness + testHarness = createTestHarness()) { // Initialization testHarness.open(); testHarness.registerTableSchema(CUSTOMERS, CUSTOMERS_SCHEMA, 0L); // DataChangeEvent - PrePartitionOperator operator = testHarness.getOperator(); + RegularPrePartitionOperator operator = testHarness.getOperator(); BinaryRecordDataGenerator recordDataGenerator = new BinaryRecordDataGenerator(((RowType) CUSTOMERS_SCHEMA.toRowDataType())); DataChangeEvent eventA = @@ -117,13 +119,13 @@ void testPartitioningDataChangeEvent() throws Exception { assertThat(testHarness.getOutputRecords().poll()) .isEqualTo( new StreamRecord<>( - new PartitioningEvent( + PartitioningEvent.ofRegular( eventA, getPartitioningTarget(CUSTOMERS_SCHEMA, eventA)))); assertThat(testHarness.getOutputRecords().poll()) .isEqualTo( new StreamRecord<>( - new PartitioningEvent( + PartitioningEvent.ofRegular( eventB, getPartitioningTarget(CUSTOMERS_SCHEMA, eventB)))); } @@ -136,12 +138,13 @@ private int getPartitioningTarget(Schema schema, DataChangeEvent dataChangeEvent % DOWNSTREAM_PARALLELISM; } - private EventOperatorTestHarness createTestHarness() { - PrePartitionOperator operator = - new PrePartitionOperator( + private RegularEventOperatorTestHarness + createTestHarness() { + RegularPrePartitionOperator operator = + new RegularPrePartitionOperator( TestingSchemaRegistryGateway.SCHEMA_OPERATOR_ID, DOWNSTREAM_PARALLELISM, new DefaultDataChangeEventHashFunctionProvider()); - return new EventOperatorTestHarness<>(operator, DOWNSTREAM_PARALLELISM); + return RegularEventOperatorTestHarness.with(operator, DOWNSTREAM_PARALLELISM); } } diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializerTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializerTest.java index eadb12d5d54..4d33dc5603e 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializerTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/EventSerializerTest.java @@ -47,9 +47,9 @@ protected Class getTypeClass() { protected Event[] getTestData() { Event[] flushEvents = new Event[] { - new FlushEvent(TableId.tableId("table"), 1L), - new FlushEvent(TableId.tableId("schema", "table"), 2L), - new FlushEvent(TableId.tableId("namespace", "schema", "table"), 3L) + FlushEvent.of(TableId.tableId("table"), 1L), + FlushEvent.of(TableId.tableId("schema", "table"), 2L), + FlushEvent.of(TableId.tableId("namespace", "schema", "table"), 3L) }; Event[] dataChangeEvents = new DataChangeEventSerializerTest().getTestData(); Event[] schemaChangeEvents = new SchemaChangeEventSerializerTest().getTestData(); diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializerTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializerTest.java index 6b8a77a12ea..13e1f98e1bb 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializerTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/serializer/event/PartitioningEventSerializerTest.java @@ -51,9 +51,9 @@ protected Class getTypeClass() { protected PartitioningEvent[] getTestData() { Event[] flushEvents = new Event[] { - new FlushEvent(TableId.tableId("table"), 1L), - new FlushEvent(TableId.tableId("schema", "table"), 2L), - new FlushEvent(TableId.tableId("namespace", "schema", "table"), 3L) + FlushEvent.of(TableId.tableId("table"), 1L), + FlushEvent.of(TableId.tableId("schema", "table"), 2L), + FlushEvent.of(TableId.tableId("namespace", "schema", "table"), 3L) }; Event[] dataChangeEvents = new DataChangeEventSerializerTest().getTestData(); Event[] schemaChangeEvents = new SchemaChangeEventSerializerTest().getTestData(); @@ -62,15 +62,15 @@ protected PartitioningEvent[] getTestData() { partitioningEvents.addAll( Arrays.stream(flushEvents) - .map(event -> new PartitioningEvent(event, 1)) + .map(event -> PartitioningEvent.ofRegular(event, 1)) .collect(Collectors.toList())); partitioningEvents.addAll( Arrays.stream(dataChangeEvents) - .map(event -> new PartitioningEvent(event, 2)) + .map(event -> PartitioningEvent.ofRegular(event, 2)) .collect(Collectors.toList())); partitioningEvents.addAll( Arrays.stream(schemaChangeEvents) - .map(event -> new PartitioningEvent(event, 3)) + .map(event -> PartitioningEvent.ofRegular(event, 3)) .collect(Collectors.toList())); return partitioningEvents.toArray(new PartitioningEvent[0]); diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/DistributedEventOperatorTestHarness.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/DistributedEventOperatorTestHarness.java new file mode 100644 index 00000000000..fcbf181d109 --- /dev/null +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/DistributedEventOperatorTestHarness.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.runtime.testutils.operators; + +import org.apache.flink.cdc.common.event.Event; +import org.apache.flink.cdc.common.event.FlushEvent; +import org.apache.flink.cdc.common.event.TableId; +import org.apache.flink.cdc.common.schema.Schema; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.FlushSuccessEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.SinkWriterRegisterEvent; +import org.apache.flink.cdc.runtime.operators.schema.distributed.SchemaCoordinator; +import org.apache.flink.cdc.runtime.operators.sink.SchemaEvolutionClient; +import org.apache.flink.cdc.runtime.testutils.schema.CollectingMetadataApplier; +import org.apache.flink.cdc.runtime.testutils.schema.TestingSchemaRegistryGateway; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.jobgraph.OperatorID; +import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway; +import org.apache.flink.runtime.operators.testutils.DummyEnvironment; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.StreamTask; +import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; +import org.apache.flink.streaming.util.MockStreamConfig; +import org.apache.flink.util.OutputTag; +import org.apache.flink.util.SerializedValue; + +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.concurrent.Executors; + +import static org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils.unwrap; + +/** + * Harness for testing customized operators handling {@link Event}s in CDC pipeline. + * + *

In addition to regular operator context and lifecycle management, this test harness also wraps + * {@link TestingSchemaRegistryGateway} into the context of tested operator, in order to support + * testing operators that have interaction with {@link SchemaCoordinator} via {@link + * SchemaEvolutionClient}. + * + * @param Type of the operator + * @param Type of the event emitted by the operator + */ +public class DistributedEventOperatorTestHarness< + OP extends AbstractStreamOperator, E extends Event> + implements AutoCloseable { + public static final OperatorID SCHEMA_OPERATOR_ID = new OperatorID(15213L, 15513L); + + public static final OperatorID SINK_OPERATOR_ID = new OperatorID(15214L, 15514L); + + private final OP operator; + private final int numOutputs; + private final SchemaCoordinator schemaCoordinator; + private final TestingSchemaRegistryGateway schemaRegistryGateway; + private final LinkedList> outputRecords = new LinkedList<>(); + private final MockedOperatorCoordinatorContext mockedContext; + + public DistributedEventOperatorTestHarness(OP operator, int numOutputs) { + this(operator, numOutputs, Duration.ofSeconds(3), Duration.ofMinutes(3)); + } + + public DistributedEventOperatorTestHarness( + OP operator, int numOutputs, Duration applyDuration, Duration rpcTimeout) { + this.operator = operator; + this.numOutputs = numOutputs; + this.mockedContext = + new MockedOperatorCoordinatorContext( + SCHEMA_OPERATOR_ID, Thread.currentThread().getContextClassLoader()); + this.schemaCoordinator = + new SchemaCoordinator( + "SchemaRegistry", + mockedContext, + Executors.newFixedThreadPool(1), + new CollectingMetadataApplier(applyDuration), + new ArrayList<>(), + rpcTimeout); + this.schemaRegistryGateway = new TestingSchemaRegistryGateway(schemaCoordinator); + } + + public void open() throws Exception { + schemaCoordinator.start(); + initializeOperator(); + operator.open(); + } + + public LinkedList> getOutputRecords() { + return outputRecords; + } + + public void clearOutputRecords() { + outputRecords.clear(); + } + + public OP getOperator() { + return operator; + } + + public void registerTableSchema(TableId tableId, Schema schema) { + schemaCoordinator.emplaceOriginalSchema(tableId, 0, schema); + schemaCoordinator.emplaceEvolvedSchema(tableId, schema); + } + + public Schema getLatestEvolvedSchema(TableId tableId) throws Exception { + return ((GetEvolvedSchemaResponse) + unwrap( + schemaCoordinator + .handleCoordinationRequest( + new GetEvolvedSchemaRequest( + tableId, + GetEvolvedSchemaRequest + .LATEST_SCHEMA_VERSION)) + .get())) + .getSchema() + .orElse(null); + } + + public boolean isJobFailed() { + return mockedContext.isJobFailed(); + } + + public Throwable getJobFailureCause() { + return mockedContext.getFailureCause(); + } + + @Override + public void close() throws Exception { + operator.close(); + } + + // -------------------------------------- Helper functions ------------------------------- + + private void initializeOperator() throws Exception { + operator.setup( + new MockStreamTask(schemaRegistryGateway), + new MockStreamConfig(new Configuration(), numOutputs), + new EventCollectingOutput<>(outputRecords, schemaRegistryGateway)); + schemaRegistryGateway.sendOperatorEventToCoordinator( + SINK_OPERATOR_ID, new SerializedValue<>(new SinkWriterRegisterEvent(0))); + } + + // ---------------------------------------- Helper classes --------------------------------- + + private static class EventCollectingOutput implements Output> { + private final LinkedList> outputRecords; + + private final TestingSchemaRegistryGateway schemaRegistryGateway; + + public EventCollectingOutput( + LinkedList> outputRecords, + TestingSchemaRegistryGateway schemaRegistryGateway) { + this.outputRecords = outputRecords; + this.schemaRegistryGateway = schemaRegistryGateway; + } + + @Override + public void collect(StreamRecord record) { + outputRecords.add(record); + Event event = record.getValue(); + if (event instanceof FlushEvent) { + try { + schemaRegistryGateway.sendOperatorEventToCoordinator( + SINK_OPERATOR_ID, new SerializedValue<>(FlushSuccessEvent.ofAll(0))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void emitWatermark(Watermark mark) { + throw new UnsupportedOperationException(); + } + + @Override + public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { + throw new UnsupportedOperationException(); + } + + @Override + public void collect(OutputTag outputTag, StreamRecord record) { + throw new UnsupportedOperationException(); + } + + @Override + public void emitLatencyMarker(LatencyMarker latencyMarker) { + throw new UnsupportedOperationException(); + } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) {} + + @Override + public void close() {} + } + + private static class MockStreamTask extends StreamTask> { + protected MockStreamTask(TestingSchemaRegistryGateway schemaRegistryGateway) + throws Exception { + super(new SchemaRegistryCoordinatingEnvironment(schemaRegistryGateway)); + } + + @Override + protected void init() {} + } + + private static class SchemaRegistryCoordinatingEnvironment extends DummyEnvironment { + private final TestingSchemaRegistryGateway schemaRegistryGateway; + + public SchemaRegistryCoordinatingEnvironment( + TestingSchemaRegistryGateway schemaRegistryGateway) { + this.schemaRegistryGateway = schemaRegistryGateway; + } + + @Override + public TaskOperatorEventGateway getOperatorCoordinatorEventGateway() { + return schemaRegistryGateway; + } + } +} diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/EventOperatorTestHarness.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/RegularEventOperatorTestHarness.java similarity index 67% rename from flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/EventOperatorTestHarness.java rename to flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/RegularEventOperatorTestHarness.java index c272667c979..35e5f1c2a5e 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/EventOperatorTestHarness.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/operators/RegularEventOperatorTestHarness.java @@ -17,21 +17,20 @@ package org.apache.flink.cdc.runtime.testutils.operators; -import org.apache.flink.cdc.common.event.CreateTableEvent; import org.apache.flink.cdc.common.event.Event; import org.apache.flink.cdc.common.event.FlushEvent; import org.apache.flink.cdc.common.event.SchemaChangeEventType; +import org.apache.flink.cdc.common.event.SchemaChangeEventTypeFamily; import org.apache.flink.cdc.common.event.TableId; import org.apache.flink.cdc.common.pipeline.SchemaChangeBehavior; import org.apache.flink.cdc.common.schema.Schema; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; -import org.apache.flink.cdc.runtime.operators.schema.event.FlushSuccessEvent; -import org.apache.flink.cdc.runtime.operators.schema.event.GetEvolvedSchemaRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.GetEvolvedSchemaResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.GetOriginalSchemaRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.GetOriginalSchemaResponse; -import org.apache.flink.cdc.runtime.operators.schema.event.SchemaChangeRequest; -import org.apache.flink.cdc.runtime.operators.schema.event.SinkWriterRegisterEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.FlushSuccessEvent; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetEvolvedSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaRequest; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.GetOriginalSchemaResponse; +import org.apache.flink.cdc.runtime.operators.schema.common.event.common.SinkWriterRegisterEvent; +import org.apache.flink.cdc.runtime.operators.schema.regular.SchemaCoordinator; import org.apache.flink.cdc.runtime.operators.sink.SchemaEvolutionClient; import org.apache.flink.cdc.runtime.testutils.schema.CollectingMetadataApplier; import org.apache.flink.cdc.runtime.testutils.schema.TestingSchemaRegistryGateway; @@ -54,24 +53,27 @@ import java.io.IOException; import java.time.Duration; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.LinkedList; import java.util.Set; import java.util.concurrent.Executors; +import java.util.stream.Collectors; -import static org.apache.flink.cdc.runtime.operators.schema.event.CoordinationResponseUtils.unwrap; +import static org.apache.flink.cdc.runtime.operators.schema.common.event.common.CoordinationResponseUtils.unwrap; /** * Harness for testing customized operators handling {@link Event}s in CDC pipeline. * *

In addition to regular operator context and lifecycle management, this test harness also wraps * {@link TestingSchemaRegistryGateway} into the context of tested operator, in order to support - * testing operators that have interaction with {@link SchemaRegistry} via {@link + * testing operators that have interaction with {@link SchemaCoordinator} via {@link * SchemaEvolutionClient}. * * @param Type of the operator * @param Type of the event emitted by the operator */ -public class EventOperatorTestHarness, E extends Event> +public class RegularEventOperatorTestHarness, E extends Event> implements AutoCloseable { public static final OperatorID SCHEMA_OPERATOR_ID = new OperatorID(15213L, 15513L); @@ -79,63 +81,16 @@ public class EventOperatorTestHarness, E ex private final OP operator; private final int numOutputs; - private final SchemaRegistry schemaRegistry; + private final SchemaCoordinator schemaRegistry; private final TestingSchemaRegistryGateway schemaRegistryGateway; private final LinkedList> outputRecords = new LinkedList<>(); private final MockedOperatorCoordinatorContext mockedContext; - public EventOperatorTestHarness(OP operator, int numOutputs) { - this(operator, numOutputs, null, SchemaChangeBehavior.EVOLVE); - } - - public EventOperatorTestHarness(OP operator, int numOutputs, Duration duration) { - this(operator, numOutputs, duration, SchemaChangeBehavior.EVOLVE); - } - - public EventOperatorTestHarness( - OP operator, int numOutputs, Duration duration, SchemaChangeBehavior behavior) { - this.operator = operator; - this.numOutputs = numOutputs; - this.mockedContext = - new MockedOperatorCoordinatorContext( - SCHEMA_OPERATOR_ID, Thread.currentThread().getContextClassLoader()); - schemaRegistry = - new SchemaRegistry( - "SchemaOperator", - mockedContext, - Executors.newFixedThreadPool(1), - new CollectingMetadataApplier(duration), - new ArrayList<>(), - behavior); - schemaRegistryGateway = new TestingSchemaRegistryGateway(schemaRegistry); - } - - public EventOperatorTestHarness( + private RegularEventOperatorTestHarness( OP operator, int numOutputs, - Duration duration, - SchemaChangeBehavior behavior, - Set enabledEventTypes) { - this.operator = operator; - this.numOutputs = numOutputs; - this.mockedContext = - new MockedOperatorCoordinatorContext( - SCHEMA_OPERATOR_ID, Thread.currentThread().getContextClassLoader()); - schemaRegistry = - new SchemaRegistry( - "SchemaOperator", - mockedContext, - Executors.newFixedThreadPool(1), - new CollectingMetadataApplier(duration, enabledEventTypes), - new ArrayList<>(), - behavior); - schemaRegistryGateway = new TestingSchemaRegistryGateway(schemaRegistry); - } - - public EventOperatorTestHarness( - OP operator, - int numOutputs, - Duration duration, + Duration schemaEvolveDuration, + Duration rpcTimeout, SchemaChangeBehavior behavior, Set enabledEventTypes, Set errorsOnEventTypes) { @@ -145,18 +100,97 @@ public EventOperatorTestHarness( new MockedOperatorCoordinatorContext( SCHEMA_OPERATOR_ID, Thread.currentThread().getContextClassLoader()); schemaRegistry = - new SchemaRegistry( + new SchemaCoordinator( "SchemaOperator", mockedContext, Executors.newFixedThreadPool(1), new CollectingMetadataApplier( - duration, enabledEventTypes, errorsOnEventTypes), + schemaEvolveDuration, enabledEventTypes, errorsOnEventTypes), new ArrayList<>(), - behavior); + behavior, + rpcTimeout); schemaRegistryGateway = new TestingSchemaRegistryGateway(schemaRegistry); } + public static , E extends Event> + RegularEventOperatorTestHarness with(OP operator, int numOutputs) { + return new RegularEventOperatorTestHarness<>( + operator, + numOutputs, + null, + null, + SchemaChangeBehavior.EVOLVE, + Arrays.stream(SchemaChangeEventTypeFamily.ALL).collect(Collectors.toSet()), + Collections.emptySet()); + } + + public static , E extends Event> + RegularEventOperatorTestHarness withDuration( + OP operator, int numOutputs, Duration evolveDuration) { + return new RegularEventOperatorTestHarness<>( + operator, + numOutputs, + evolveDuration, + null, + SchemaChangeBehavior.EVOLVE, + Arrays.stream(SchemaChangeEventTypeFamily.ALL).collect(Collectors.toSet()), + Collections.emptySet()); + } + + public static , E extends Event> + RegularEventOperatorTestHarness withDurationAndBehavior( + OP operator, + int numOutputs, + Duration evolveDuration, + SchemaChangeBehavior behavior) { + return new RegularEventOperatorTestHarness<>( + operator, + numOutputs, + evolveDuration, + null, + behavior, + Arrays.stream(SchemaChangeEventTypeFamily.ALL).collect(Collectors.toSet()), + Collections.emptySet()); + } + + public static , E extends Event> + RegularEventOperatorTestHarness withDurationAndFineGrainedBehavior( + OP operator, + int numOutputs, + Duration evolveDuration, + SchemaChangeBehavior behavior, + Set enabledEventTypes) { + return new RegularEventOperatorTestHarness<>( + operator, + numOutputs, + evolveDuration, + null, + behavior, + enabledEventTypes, + Collections.emptySet()); + } + + public static , E extends Event> + RegularEventOperatorTestHarness withDurationAndFineGrainedBehaviorWithError( + OP operator, + int numOutputs, + Duration evolveDuration, + SchemaChangeBehavior behavior, + Set enabledEventTypes, + Set errorOnEventTypes) { + + return new RegularEventOperatorTestHarness<>( + operator, + numOutputs, + evolveDuration, + null, + behavior, + enabledEventTypes, + errorOnEventTypes); + } + public void open() throws Exception { + schemaRegistry.start(); initializeOperator(); operator.open(); } @@ -174,9 +208,8 @@ public OP getOperator() { } public void registerTableSchema(TableId tableId, Schema schema, long nonce) { - schemaRegistry.handleCoordinationRequest( - new SchemaChangeRequest(tableId, new CreateTableEvent(tableId, schema), 0, nonce)); - schemaRegistry.handleApplyEvolvedSchemaChangeRequest(new CreateTableEvent(tableId, schema)); + schemaRegistry.emplaceOriginalSchema(tableId, schema); + schemaRegistry.emplaceEvolvedSchema(tableId, schema); } public Schema getLatestOriginalSchema(TableId tableId) throws Exception { @@ -254,7 +287,7 @@ public void collect(StreamRecord record) { schemaRegistryGateway.sendOperatorEventToCoordinator( SINK_OPERATOR_ID, new SerializedValue<>( - new FlushSuccessEvent( + FlushSuccessEvent.of( 0, ((FlushEvent) event).getTableId(), ((FlushEvent) event).getNonce()))); diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/schema/TestingSchemaRegistryGateway.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/schema/TestingSchemaRegistryGateway.java index 1859d0e4469..7a239a1d6f7 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/schema/TestingSchemaRegistryGateway.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/testutils/schema/TestingSchemaRegistryGateway.java @@ -17,7 +17,7 @@ package org.apache.flink.cdc.runtime.testutils.schema; -import org.apache.flink.cdc.runtime.operators.schema.coordinator.SchemaRegistry; +import org.apache.flink.cdc.runtime.operators.schema.common.SchemaRegistry; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway; import org.apache.flink.runtime.operators.coordination.CoordinationRequest; diff --git a/tools/mig-test/datastream/compile_jobs.rb b/tools/mig-test/datastream/compile_jobs.rb index 68a5d8e824c..d944ac91fd0 100644 --- a/tools/mig-test/datastream/compile_jobs.rb +++ b/tools/mig-test/datastream/compile_jobs.rb @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -JOB_VERSIONS = %w[2.4.2 3.0.0 3.0.1 3.1.0 3.1.1 3.3-SNAPSHOT] +JOB_VERSIONS = %w[3.2.0 3.2.1 3.3-SNAPSHOT] JOB_VERSIONS.each do |version| puts "Compiling DataStream job for CDC #{version}" diff --git a/tools/mig-test/datastream/datastream-2.4.2/pom.xml b/tools/mig-test/datastream/datastream-2.4.2/pom.xml deleted file mode 100644 index fd3fbe0cad8..00000000000 --- a/tools/mig-test/datastream/datastream-2.4.2/pom.xml +++ /dev/null @@ -1,151 +0,0 @@ - - - - 4.0.0 - - org.apache.flink - datastream-job - 2.4.2 - jar - - - UTF-8 - 1.17.1 - 2.4.2 - 1.9.7.Final - 2.12 - 2.0.13 - 8 - 8 - UTF-8 - - - - - org.apache.flink - flink-streaming-java - ${flink.version} - - - org.apache.flink - flink-runtime - ${flink.version} - - - org.apache.flink - flink-java - ${flink.version} - - - org.apache.flink - flink-connector-base - ${flink.version} - - - org.apache.flink - flink-clients - ${flink.version} - - - org.apache.flink - flink-table-planner_${scala.binary.version} - ${flink.version} - - - org.apache.flink - flink-table-runtime - ${flink.version} - - - org.apache.flink - flink-core - ${flink.version} - - - org.apache.flink - flink-table-common - ${flink.version} - - - - - - - - - - org.apache.flink - flink-shaded-guava - 30.1.1-jre-16.1 - - - com.ververica - flink-connector-debezium - ${flink.cdc.version} - - - com.ververica - flink-cdc-base - ${flink.cdc.version} - - - com.ververica - flink-connector-mysql-cdc - ${flink.cdc.version} - - - io.debezium - debezium-connector-mysql - ${debezium.version} - - - org.slf4j - slf4j-api - ${slf4j.version} - - - org.slf4j - slf4j-simple - ${slf4j.version} - - - - - - - - maven-assembly-plugin - - - package - - single - - - - - - jar-with-dependencies - - - - - - \ No newline at end of file diff --git a/tools/mig-test/datastream/datastream-2.4.2/src/main/java/DataStreamJob.java b/tools/mig-test/datastream/datastream-2.4.2/src/main/java/DataStreamJob.java deleted file mode 100644 index 4e2a7a901cd..00000000000 --- a/tools/mig-test/datastream/datastream-2.4.2/src/main/java/DataStreamJob.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import com.ververica.cdc.connectors.mysql.source.MySqlSource; -import com.ververica.cdc.connectors.mysql.table.StartupOptions; -import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; - -public class DataStreamJob { - - public static void main(String[] args) { - MySqlSource mySqlSource = MySqlSource.builder() - .hostname("localhost") - .port(3306) - .databaseList("fallen") - .tableList("fallen.angel", "fallen.gabriel", "fallen.girl") - .startupOptions(StartupOptions.initial()) - .username("root") - .password("") - .deserializer(new JsonDebeziumDeserializationSchema()) - .serverTimeZone("UTC") - .build(); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(3000); - - env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MySQL CDC Source") - .uid("sql-source-uid") - .setParallelism(1) - .print() - .setParallelism(1); - - try { - env.execute(); - } catch (Exception e) { - // ... unfortunately - } - } -} diff --git a/tools/mig-test/datastream/datastream-3.0.0/pom.xml b/tools/mig-test/datastream/datastream-3.0.0/pom.xml deleted file mode 100644 index 5d8711a814e..00000000000 --- a/tools/mig-test/datastream/datastream-3.0.0/pom.xml +++ /dev/null @@ -1,151 +0,0 @@ - - - - 4.0.0 - - org.apache.flink - datastream-job - 3.0.0 - jar - - - UTF-8 - 1.18.1 - 3.0.0 - 1.9.7.Final - 2.12 - 2.0.13 - 8 - 8 - UTF-8 - - - - - org.apache.flink - flink-streaming-java - ${flink.version} - - - org.apache.flink - flink-runtime - ${flink.version} - - - org.apache.flink - flink-java - ${flink.version} - - - org.apache.flink - flink-connector-base - ${flink.version} - - - org.apache.flink - flink-clients - ${flink.version} - - - org.apache.flink - flink-table-planner_${scala.binary.version} - ${flink.version} - - - org.apache.flink - flink-table-runtime - ${flink.version} - - - org.apache.flink - flink-core - ${flink.version} - - - org.apache.flink - flink-table-common - ${flink.version} - - - - - - - - - - org.apache.flink - flink-shaded-guava - 31.1-jre-17.0 - - - com.ververica - flink-connector-debezium - ${flink.cdc.version} - - - com.ververica - flink-cdc-base - ${flink.cdc.version} - - - com.ververica - flink-connector-mysql-cdc - ${flink.cdc.version} - - - io.debezium - debezium-connector-mysql - ${debezium.version} - - - org.slf4j - slf4j-api - ${slf4j.version} - - - org.slf4j - slf4j-simple - ${slf4j.version} - - - - - - - - maven-assembly-plugin - - - package - - single - - - - - - jar-with-dependencies - - - - - - \ No newline at end of file diff --git a/tools/mig-test/datastream/datastream-3.0.0/src/main/java/DataStreamJob.java b/tools/mig-test/datastream/datastream-3.0.0/src/main/java/DataStreamJob.java deleted file mode 100644 index 4e2a7a901cd..00000000000 --- a/tools/mig-test/datastream/datastream-3.0.0/src/main/java/DataStreamJob.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import com.ververica.cdc.connectors.mysql.source.MySqlSource; -import com.ververica.cdc.connectors.mysql.table.StartupOptions; -import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; - -public class DataStreamJob { - - public static void main(String[] args) { - MySqlSource mySqlSource = MySqlSource.builder() - .hostname("localhost") - .port(3306) - .databaseList("fallen") - .tableList("fallen.angel", "fallen.gabriel", "fallen.girl") - .startupOptions(StartupOptions.initial()) - .username("root") - .password("") - .deserializer(new JsonDebeziumDeserializationSchema()) - .serverTimeZone("UTC") - .build(); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(3000); - - env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MySQL CDC Source") - .uid("sql-source-uid") - .setParallelism(1) - .print() - .setParallelism(1); - - try { - env.execute(); - } catch (Exception e) { - // ... unfortunately - } - } -} diff --git a/tools/mig-test/datastream/datastream-3.0.1/.gitignore b/tools/mig-test/datastream/datastream-3.0.1/.gitignore deleted file mode 100644 index 5ff6309b719..00000000000 --- a/tools/mig-test/datastream/datastream-3.0.1/.gitignore +++ /dev/null @@ -1,38 +0,0 @@ -target/ -!.mvn/wrapper/maven-wrapper.jar -!**/src/main/**/target/ -!**/src/test/**/target/ - -### IntelliJ IDEA ### -.idea/modules.xml -.idea/jarRepositories.xml -.idea/compiler.xml -.idea/libraries/ -*.iws -*.iml -*.ipr - -### Eclipse ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ -build/ -!**/src/main/**/build/ -!**/src/test/**/build/ - -### VS Code ### -.vscode/ - -### Mac OS ### -.DS_Store \ No newline at end of file diff --git a/tools/mig-test/datastream/datastream-3.0.1/pom.xml b/tools/mig-test/datastream/datastream-3.0.1/pom.xml deleted file mode 100644 index 4044fd661a2..00000000000 --- a/tools/mig-test/datastream/datastream-3.0.1/pom.xml +++ /dev/null @@ -1,151 +0,0 @@ - - - - 4.0.0 - - org.apache.flink - datastream-job - 3.0.1 - jar - - - UTF-8 - 1.18.1 - 3.0.1 - 1.9.7.Final - 2.12 - 2.0.13 - 8 - 8 - UTF-8 - - - - - org.apache.flink - flink-streaming-java - ${flink.version} - - - org.apache.flink - flink-runtime - ${flink.version} - - - org.apache.flink - flink-java - ${flink.version} - - - org.apache.flink - flink-connector-base - ${flink.version} - - - org.apache.flink - flink-clients - ${flink.version} - - - org.apache.flink - flink-table-planner_${scala.binary.version} - ${flink.version} - - - org.apache.flink - flink-table-runtime - ${flink.version} - - - org.apache.flink - flink-core - ${flink.version} - - - org.apache.flink - flink-table-common - ${flink.version} - - - - - - - - - - org.apache.flink - flink-shaded-guava - 31.1-jre-17.0 - - - com.ververica - flink-connector-debezium - ${flink.cdc.version} - - - com.ververica - flink-cdc-base - ${flink.cdc.version} - - - com.ververica - flink-connector-mysql-cdc - ${flink.cdc.version} - - - io.debezium - debezium-connector-mysql - ${debezium.version} - - - org.slf4j - slf4j-api - ${slf4j.version} - - - org.slf4j - slf4j-simple - ${slf4j.version} - - - - - - - - maven-assembly-plugin - - - package - - single - - - - - - jar-with-dependencies - - - - - - \ No newline at end of file diff --git a/tools/mig-test/datastream/datastream-3.0.1/src/main/java/DataStreamJob.java b/tools/mig-test/datastream/datastream-3.0.1/src/main/java/DataStreamJob.java deleted file mode 100644 index 4e2a7a901cd..00000000000 --- a/tools/mig-test/datastream/datastream-3.0.1/src/main/java/DataStreamJob.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import com.ververica.cdc.connectors.mysql.source.MySqlSource; -import com.ververica.cdc.connectors.mysql.table.StartupOptions; -import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; - -public class DataStreamJob { - - public static void main(String[] args) { - MySqlSource mySqlSource = MySqlSource.builder() - .hostname("localhost") - .port(3306) - .databaseList("fallen") - .tableList("fallen.angel", "fallen.gabriel", "fallen.girl") - .startupOptions(StartupOptions.initial()) - .username("root") - .password("") - .deserializer(new JsonDebeziumDeserializationSchema()) - .serverTimeZone("UTC") - .build(); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(3000); - - env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MySQL CDC Source") - .uid("sql-source-uid") - .setParallelism(1) - .print() - .setParallelism(1); - - try { - env.execute(); - } catch (Exception e) { - // ... unfortunately - } - } -} diff --git a/tools/mig-test/datastream/datastream-3.1.0/.gitignore b/tools/mig-test/datastream/datastream-3.1.0/.gitignore deleted file mode 100644 index 5ff6309b719..00000000000 --- a/tools/mig-test/datastream/datastream-3.1.0/.gitignore +++ /dev/null @@ -1,38 +0,0 @@ -target/ -!.mvn/wrapper/maven-wrapper.jar -!**/src/main/**/target/ -!**/src/test/**/target/ - -### IntelliJ IDEA ### -.idea/modules.xml -.idea/jarRepositories.xml -.idea/compiler.xml -.idea/libraries/ -*.iws -*.iml -*.ipr - -### Eclipse ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ -build/ -!**/src/main/**/build/ -!**/src/test/**/build/ - -### VS Code ### -.vscode/ - -### Mac OS ### -.DS_Store \ No newline at end of file diff --git a/tools/mig-test/datastream/datastream-3.1.1/.gitignore b/tools/mig-test/datastream/datastream-3.1.1/.gitignore deleted file mode 100644 index 5ff6309b719..00000000000 --- a/tools/mig-test/datastream/datastream-3.1.1/.gitignore +++ /dev/null @@ -1,38 +0,0 @@ -target/ -!.mvn/wrapper/maven-wrapper.jar -!**/src/main/**/target/ -!**/src/test/**/target/ - -### IntelliJ IDEA ### -.idea/modules.xml -.idea/jarRepositories.xml -.idea/compiler.xml -.idea/libraries/ -*.iws -*.iml -*.ipr - -### Eclipse ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ -build/ -!**/src/main/**/build/ -!**/src/test/**/build/ - -### VS Code ### -.vscode/ - -### Mac OS ### -.DS_Store \ No newline at end of file diff --git a/tools/mig-test/datastream/datastream-2.4.2/.gitignore b/tools/mig-test/datastream/datastream-3.2.0/.gitignore similarity index 100% rename from tools/mig-test/datastream/datastream-2.4.2/.gitignore rename to tools/mig-test/datastream/datastream-3.2.0/.gitignore diff --git a/tools/mig-test/datastream/datastream-3.1.1/pom.xml b/tools/mig-test/datastream/datastream-3.2.0/pom.xml similarity index 98% rename from tools/mig-test/datastream/datastream-3.1.1/pom.xml rename to tools/mig-test/datastream/datastream-3.2.0/pom.xml index d8f6f88d9d1..ab112fcc676 100644 --- a/tools/mig-test/datastream/datastream-3.1.1/pom.xml +++ b/tools/mig-test/datastream/datastream-3.2.0/pom.xml @@ -22,13 +22,13 @@ limitations under the License. org.apache.flink datastream-job - 3.1.1 + 3.2.0 jar UTF-8 1.18.1 - 3.1.1 + 3.2.0 1.9.7.Final 2.12 2.0.13 diff --git a/tools/mig-test/datastream/datastream-3.1.0/src/main/java/DataStreamJob.java b/tools/mig-test/datastream/datastream-3.2.0/src/main/java/DataStreamJob.java similarity index 100% rename from tools/mig-test/datastream/datastream-3.1.0/src/main/java/DataStreamJob.java rename to tools/mig-test/datastream/datastream-3.2.0/src/main/java/DataStreamJob.java diff --git a/tools/mig-test/datastream/datastream-3.0.0/.gitignore b/tools/mig-test/datastream/datastream-3.2.1/.gitignore similarity index 100% rename from tools/mig-test/datastream/datastream-3.0.0/.gitignore rename to tools/mig-test/datastream/datastream-3.2.1/.gitignore diff --git a/tools/mig-test/datastream/datastream-3.1.0/pom.xml b/tools/mig-test/datastream/datastream-3.2.1/pom.xml similarity index 97% rename from tools/mig-test/datastream/datastream-3.1.0/pom.xml rename to tools/mig-test/datastream/datastream-3.2.1/pom.xml index 6c927b99944..c5e7a798b5e 100644 --- a/tools/mig-test/datastream/datastream-3.1.0/pom.xml +++ b/tools/mig-test/datastream/datastream-3.2.1/pom.xml @@ -22,14 +22,14 @@ limitations under the License. org.apache.flink datastream-job - 3.1.0 + 3.2.1 jar UTF-8 - 1.18.1 - 3.1.0 - 1.9.7.Final + 1.19.1 + 3.2.1 + 1.9.8.Final 2.12 2.0.13 8 diff --git a/tools/mig-test/datastream/datastream-3.1.1/src/main/java/DataStreamJob.java b/tools/mig-test/datastream/datastream-3.2.1/src/main/java/DataStreamJob.java similarity index 100% rename from tools/mig-test/datastream/datastream-3.1.1/src/main/java/DataStreamJob.java rename to tools/mig-test/datastream/datastream-3.2.1/src/main/java/DataStreamJob.java diff --git a/tools/mig-test/datastream/run_migration_test.rb b/tools/mig-test/datastream/run_migration_test.rb index aa7408d6046..e0cd8a5890b 100644 --- a/tools/mig-test/datastream/run_migration_test.rb +++ b/tools/mig-test/datastream/run_migration_test.rb @@ -93,7 +93,7 @@ def test_migration(from_version, to_version) end end -version_list = %w[2.4.2 3.0.0 3.0.1 3.1.0 3.1.1 3.3-SNAPSHOT] +version_list = %w[3.2.0 3.2.1 3.3-SNAPSHOT] version_result = Hash.new('❓') @failures = [] diff --git a/tools/mig-test/prepare_libs.rb b/tools/mig-test/prepare_libs.rb index 58b17cb6976..92175b7f936 100644 --- a/tools/mig-test/prepare_libs.rb +++ b/tools/mig-test/prepare_libs.rb @@ -21,47 +21,23 @@ Dir.chdir(__dir__) -RELEASED_VERSIONS = { - '3.0.0': { - tar: 'https://github.com/apache/flink-cdc/releases/download/release-3.0.0/flink-cdc-3.0.0-bin.tar.gz', - connectors: %w[ - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-doris/3.0.0/flink-cdc-pipeline-connector-doris-3.0.0.jar - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-mysql/3.0.0/flink-cdc-pipeline-connector-mysql-3.0.0.jar - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-starrocks/3.0.0/flink-cdc-pipeline-connector-starrocks-3.0.0.jar - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-values/3.0.0/flink-cdc-pipeline-connector-values-3.0.0.jar - ] - }, - '3.0.1': { - tar: 'https://github.com/apache/flink-cdc/releases/download/release-3.0.1/flink-cdc-3.0.1-bin.tar.gz', - connectors: %w[ - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-doris/3.0.1/flink-cdc-pipeline-connector-doris-3.0.1.jar - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-mysql/3.0.1/flink-cdc-pipeline-connector-mysql-3.0.1.jar - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-starrocks/3.0.1/flink-cdc-pipeline-connector-starrocks-3.0.1.jar - https://repo1.maven.org/maven2/com/ververica/flink-cdc-pipeline-connector-values/3.0.1/flink-cdc-pipeline-connector-values-3.0.1.jar - ] - }, - '3.1.0': { - tar: 'https://dlcdn.apache.org/flink/flink-cdc-3.1.0/flink-cdc-3.1.0-bin.tar.gz', - connectors: %w[ - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-mysql/3.1.0/flink-cdc-pipeline-connector-mysql-3.1.0.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-doris/3.1.0/flink-cdc-pipeline-connector-doris-3.1.0.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-starrocks/3.1.0/flink-cdc-pipeline-connector-starrocks-3.1.0.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-kafka/3.1.0/flink-cdc-pipeline-connector-kafka-3.1.0.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-paimon/3.1.0/flink-cdc-pipeline-connector-paimon-3.1.0.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-values/3.1.0/flink-cdc-pipeline-connector-values-3.1.0.jar - ] - }, - '3.1.1': { - tar: 'https://dlcdn.apache.org/flink/flink-cdc-3.1.1/flink-cdc-3.1.1-bin.tar.gz', - connectors: %w[ - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-mysql/3.1.1/flink-cdc-pipeline-connector-mysql-3.1.1.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-doris/3.1.1/flink-cdc-pipeline-connector-doris-3.1.1.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-starrocks/3.1.1/flink-cdc-pipeline-connector-starrocks-3.1.1.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-kafka/3.1.1/flink-cdc-pipeline-connector-kafka-3.1.1.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-paimon/3.1.1/flink-cdc-pipeline-connector-paimon-3.1.1.jar - https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-values/3.1.1/flink-cdc-pipeline-connector-values-3.1.1.jar +def gen_version(tag) + { + tar: "https://dlcdn.apache.org/flink/flink-cdc-#{tag}/flink-cdc-#{tag}-bin.tar.gz", + connectors: %W[ + https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-mysql/#{tag}/flink-cdc-pipeline-connector-mysql-#{tag}.jar + https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-doris/#{tag}/flink-cdc-pipeline-connector-doris-#{tag}.jar + https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-starrocks/#{tag}/flink-cdc-pipeline-connector-starrocks-#{tag}.jar + https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-kafka/#{tag}/flink-cdc-pipeline-connector-kafka-#{tag}.jar + https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-paimon/#{tag}/flink-cdc-pipeline-connector-paimon-#{tag}.jar + https://repo1.maven.org/maven2/org/apache/flink/flink-cdc-pipeline-connector-values/#{tag}/flink-cdc-pipeline-connector-values-#{tag}.jar ] } +end + +RELEASED_VERSIONS = { + '3.2.0': gen_version('3.2.0'), + '3.2.1': gen_version('3.2.1'), }.freeze HEAD_VERSION = '3.3-SNAPSHOT' diff --git a/tools/mig-test/run_migration_test.rb b/tools/mig-test/run_migration_test.rb index 1a454efe8f7..11d64eb8748 100644 --- a/tools/mig-test/run_migration_test.rb +++ b/tools/mig-test/run_migration_test.rb @@ -115,54 +115,32 @@ def test_migration(from_version, to_version) end version_list = case ARGV[0] - when '1.18.1' then %w[3.0.0 3.0.1 3.1.1 3.3-SNAPSHOT] - when '1.19.1' then %w[3.1.1 3.3-SNAPSHOT] - when '1.20.0' then %w[3.3-SNAPSHOT] + when '1.18.1' then %w[3.2.0 3.2.1 3.3-SNAPSHOT] + when '1.19.1' then %w[3.2.0 3.2.1 3.3-SNAPSHOT] + when '1.20.0' then %w[3.2.1 3.3-SNAPSHOT] else [] end -no_savepoint_versions = %w[3.0.0 3.0.1] version_result = Hash.new('❓') @failures = [] +new_version = version_list.last + version_list.each_with_index do |old_version, old_index| + puts "-> Testing migrating from #{old_version} to latest snapshot." puts 'Restarting cluster...' `#{FLINK_HOME}/bin/stop-cluster.sh` puts 'Stopped cluster.' `#{FLINK_HOME}/bin/start-cluster.sh` puts 'Started cluster.' - version_list.each_with_index do |new_version, new_index| - next if old_index > new_index - next if no_savepoint_versions.include? new_version - - result = test_migration old_version, new_version - version_result[old_version + new_version] = result ? '✅' : '❌' - @failures << [old_version, new_version] unless result - end -end - -printable_result = [] -printable_result << [''] + version_list -version_list.each_with_index do |old_version, old_index| - table_line = [old_version] - version_list.each_with_index do |new_version, new_index| - table_line << if old_index > new_index - '' - else - version_result[old_version + new_version] - end - end - printable_result << table_line -end -begin - require 'terminal-table' - puts Terminal::Table.new rows: printable_result, title: 'Migration Test Result' -rescue LoadError - puts 'Test summary: ', printable_result + result = test_migration old_version, new_version + version_result[old_version + new_version] = result ? '✅' : '❌' + @failures << [old_version, new_version] unless result end -puts "✅ - Compatible, ❌ - Not compatible, ❓ - Target version doesn't support `--from-savepoint`" if @failures.any? + puts 'Some migration to snapshot version tests failed. Details: ' + puts @failures abort 'Some migration to snapshot version tests failed.' end