diff --git a/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt b/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt index db71b08e8c75..41b9621eec19 100644 --- a/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt +++ b/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt @@ -172,7 +172,7 @@ abstract class AzureBlobStorageStreamCopier( } @Throws(Exception::class) - override fun generateMergeStatement(destTableName: String?): String? { + override fun generateMergeStatement(destTableName: String?): String { LOGGER.info( "Preparing to merge tmp table {} to dest table: {}, schema: {}, in destination.", tmpTableName, diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt index 3cf2c234ce93..0094e8c169e1 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt @@ -34,7 +34,7 @@ interface Destination : Integration { fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? /** @@ -53,7 +53,7 @@ interface Destination : Integration { fun getSerializedMessageConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): SerializedAirbyteMessageConsumer? { return ShimToSerializedAirbyteMessageConsumer( getConsumer(config, catalog, outputRecordCollector) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt index 6a04d04180da..b57a1047d427 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt @@ -46,12 +46,12 @@ class IntegrationRunner @VisibleForTesting internal constructor( cliParser: IntegrationCliParser, - outputRecordCollector: Consumer, + outputRecordCollector: Consumer, destination: Destination?, source: Source? ) { private val cliParser: IntegrationCliParser - private val outputRecordCollector: Consumer + private val outputRecordCollector: Consumer private val integration: Integration private val destination: Destination? private val source: Source? @@ -61,7 +61,7 @@ internal constructor( destination: Destination? ) : this( IntegrationCliParser(), - Consumer { message: AirbyteMessage? -> + Consumer { message: AirbyteMessage -> Destination.Companion.defaultOutputRecordCollector(message) }, destination, @@ -72,7 +72,7 @@ internal constructor( source: Source? ) : this( IntegrationCliParser(), - Consumer { message: AirbyteMessage? -> + Consumer { message: AirbyteMessage -> Destination.Companion.defaultOutputRecordCollector(message) }, null, @@ -99,7 +99,7 @@ internal constructor( @VisibleForTesting internal constructor( cliParser: IntegrationCliParser, - outputRecordCollector: Consumer, + outputRecordCollector: Consumer, destination: Destination?, source: Source?, jsonSchemaValidator: JsonSchemaValidator @@ -254,7 +254,7 @@ internal constructor( private fun produceMessages( messageIterator: AutoCloseableIterator, - recordCollector: Consumer + recordCollector: Consumer ) { messageIterator!!.airbyteStream.ifPresent { s: AirbyteStreamNameNamespacePair? -> LOGGER.debug("Producing messages for stream {}...", s) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt index 6bf42876047b..916da5f6e5a6 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt @@ -31,7 +31,7 @@ abstract class SpecModifyingDestination(private val destination: Destination) : override fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { return destination.getConsumer(config, catalog, outputRecordCollector) } @@ -40,7 +40,7 @@ abstract class SpecModifyingDestination(private val destination: Destination) : override fun getSerializedMessageConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): SerializedAirbyteMessageConsumer? { return destination.getSerializedMessageConsumer(config, catalog, outputRecordCollector) } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt index 999eba99baf8..aaed6e6fb1d1 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt @@ -94,7 +94,7 @@ class SshWrappedDestination : Destination { override fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { val tunnel = getTunnelInstance(config) @@ -120,7 +120,7 @@ class SshWrappedDestination : Destination { override fun getSerializedMessageConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): SerializedAirbyteMessageConsumer? { val clone = Jsons.clone(config) val connectionOptionsConfig: Optional = diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt index 49ed8224f5b1..65dc82bd7d14 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt @@ -38,7 +38,7 @@ interface DestinationFlushFunction { @Throws(Exception::class) fun flush( decs: StreamDescriptor, - stream: Stream, + stream: Stream, ) /** diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt index 75d8fffea82f..579eb0506242 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt @@ -60,7 +60,7 @@ import org.slf4j.LoggerFactory class BufferedStreamConsumer @VisibleForTesting internal constructor( - private val outputRecordCollector: Consumer?, + private val outputRecordCollector: Consumer, private val onStart: OnStartFunction, private val bufferingStrategy: BufferingStrategy, private val onClose: OnCloseFunction, @@ -87,7 +87,7 @@ internal constructor( */ @Deprecated("") constructor( - outputRecordCollector: Consumer?, + outputRecordCollector: Consumer, onStart: OnStartFunction, bufferingStrategy: BufferingStrategy, onClose: OnCloseFunction, @@ -109,7 +109,7 @@ internal constructor( ) constructor( - outputRecordCollector: Consumer?, + outputRecordCollector: Consumer, onStart: OnStartFunction, bufferingStrategy: BufferingStrategy, onClose: OnCloseFunction, diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt index d05bbfbdbd52..b0bf8a570082 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt @@ -6,7 +6,8 @@ package io.airbyte.cdk.integrations.destination.buffered_stream_consumer import io.airbyte.commons.functional.CheckedBiConsumer import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair -interface RecordWriter : CheckedBiConsumer, Exception> { +fun interface RecordWriter : + CheckedBiConsumer, Exception> { @Throws(Exception::class) override fun accept(stream: AirbyteStreamNameNamespacePair, records: List) } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt index 8ce7912fd8cc..da28224adf18 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt @@ -52,7 +52,7 @@ interface SqlOperations { * @throws Exception exception */ @Throws(Exception::class) - fun createTableIfNotExists(database: JdbcDatabase?, schemaName: String?, tableName: String?) + fun createTableIfNotExists(database: JdbcDatabase, schemaName: String?, tableName: String?) /** * Query to create a table with provided name in provided schema if it does not already exist. @@ -72,7 +72,7 @@ interface SqlOperations { * @throws Exception exception */ @Throws(Exception::class) - fun dropTableIfExists(database: JdbcDatabase?, schemaName: String?, tableName: String?) + fun dropTableIfExists(database: JdbcDatabase, schemaName: String?, tableName: String?) /** * Query to remove all records from a table. Assumes the table exists. @@ -82,11 +82,7 @@ interface SqlOperations { * @param tableName Name of table * @return Query */ - fun truncateTableQuery( - database: JdbcDatabase?, - schemaName: String?, - tableName: String? - ): String? + fun truncateTableQuery(database: JdbcDatabase?, schemaName: String?, tableName: String?): String /** * Insert records into table. Assumes the table exists. @@ -99,8 +95,8 @@ interface SqlOperations { */ @Throws(Exception::class) fun insertRecords( - database: JdbcDatabase?, - records: List?, + database: JdbcDatabase, + records: List, schemaName: String?, tableName: String? ) @@ -131,8 +127,7 @@ interface SqlOperations { * @param queries Queries to execute * @throws Exception exception */ - @Throws(Exception::class) - fun executeTransaction(database: JdbcDatabase?, queries: List?) + @Throws(Exception::class) fun executeTransaction(database: JdbcDatabase, queries: List) /** Check if the data record is valid and ok to be written to destination */ fun isValidData(data: JsonNode?): Boolean diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt index 76ae3262702f..bb8530eb46b7 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt @@ -48,7 +48,7 @@ interface StreamCopier { @Throws(Exception::class) fun createDestinationTable(): String? /** Generates a merge SQL statement from the temporary table to the final table. */ - @Throws(Exception::class) fun generateMergeStatement(destTableName: String?): String? + @Throws(Exception::class) fun generateMergeStatement(destTableName: String?): String /** * Cleans up the copier by removing the staging file and dropping the temporary table after diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt index 80570af6b760..635187a732b6 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt @@ -21,12 +21,12 @@ import org.slf4j.LoggerFactory * This should be deprecated as we slowly move towards using [SerializedBufferingStrategy] instead. */ class InMemoryRecordBufferingStrategy( - private val recordWriter: RecordWriter, + private val recordWriter: RecordWriter, private val checkAndRemoveRecordWriter: CheckAndRemoveRecordWriter?, private val maxQueueSizeInBytes: Long ) : BufferingStrategy { private var streamBuffer: - MutableMap> = + MutableMap> = HashMap() private var fileName: String? = null @@ -34,7 +34,7 @@ class InMemoryRecordBufferingStrategy( private var bufferSizeInBytes: Long = 0 constructor( - recordWriter: RecordWriter, + recordWriter: RecordWriter, maxQueueSizeInBytes: Long ) : this(recordWriter, null, maxQueueSizeInBytes) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index c7be3358f550..f2aa5c1c94bf 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.28.11 +version=0.28.12 diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt index bbb453b4fff5..09b011583d90 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt @@ -39,7 +39,7 @@ import org.slf4j.LoggerFactory internal class IntegrationRunnerTest { private lateinit var cliParser: IntegrationCliParser - private lateinit var stdoutConsumer: Consumer + private lateinit var stdoutConsumer: Consumer private lateinit var destination: Destination private lateinit var source: Source private lateinit var configPath: Path diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt index a515bdd90f12..3a1d1fe31847 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt @@ -553,7 +553,7 @@ class AsyncStreamConsumerTest { namespace: String, allRecords: List, ) { - val argumentCaptor = org.mockito.kotlin.argumentCaptor>() + val argumentCaptor = org.mockito.kotlin.argumentCaptor>() Mockito.verify(flushFunction, Mockito.atLeast(1)) .flush( org.mockito.kotlin.eq( diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt index c53d9deb3511..46ec065ebcad 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt @@ -31,10 +31,10 @@ import org.mockito.kotlin.mock class BufferedStreamConsumerTest { private lateinit var consumer: BufferedStreamConsumer private lateinit var onStart: OnStartFunction - private lateinit var recordWriter: RecordWriter + private lateinit var recordWriter: RecordWriter private lateinit var onClose: OnCloseFunction private lateinit var isValidRecord: CheckedFunction - private lateinit var outputRecordCollector: Consumer + private lateinit var outputRecordCollector: Consumer @BeforeEach @Throws(Exception::class) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt index 9474afe20d7a..90584e05b129 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt @@ -16,7 +16,7 @@ import org.mockito.Mockito import org.mockito.kotlin.mock class InMemoryRecordBufferingStrategyTest { - private val recordWriter: RecordWriter = mock() + private val recordWriter: RecordWriter = mock() @Test @Throws(Exception::class) diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle b/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle index 3fc95410b3b9..a42598118edf 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle @@ -8,6 +8,10 @@ java { } } +compileKotlin.compilerOptions.allWarningsAsErrors = false +compileTestFixturesKotlin.compilerOptions.allWarningsAsErrors = false +compileTestKotlin.compilerOptions.allWarningsAsErrors = false + dependencies { api 'org.apache.commons:commons-csv:1.10.0' @@ -27,4 +31,6 @@ dependencies { testFixturesImplementation testFixtures(project(':airbyte-cdk:java:airbyte-cdk:typing-deduping')) testImplementation project(':airbyte-cdk:java:airbyte-cdk:typing-deduping') + testImplementation 'org.mockito.kotlin:mockito-kotlin:5.2.1' + } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java deleted file mode 100644 index ad2999d53c47..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java +++ /dev/null @@ -1,362 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE; -import static io.airbyte.cdk.integrations.base.errors.messages.ErrorMessage.getErrorMessage; -import static io.airbyte.cdk.integrations.util.ConfiguredCatalogUtilKt.addDefaultNamespaceToStreams; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.annotations.VisibleForTesting; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.JdbcConnector; -import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; -import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; -import io.airbyte.cdk.integrations.base.Destination; -import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer; -import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer; -import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteRecordMessage; -import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler; -import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; -import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcV1V2Migrator; -import io.airbyte.commons.exceptions.ConnectionErrorException; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.map.MoreMaps; -import io.airbyte.integrations.base.destination.typing_deduping.CatalogParser; -import io.airbyte.integrations.base.destination.typing_deduping.DefaultTyperDeduper; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationHandler; -import io.airbyte.integrations.base.destination.typing_deduping.NoOpTyperDeduperWithV1V2Migrations; -import io.airbyte.integrations.base.destination.typing_deduping.NoopTyperDeduper; -import io.airbyte.integrations.base.destination.typing_deduping.NoopV2TableMigrator; -import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog; -import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator; -import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper; -import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration; -import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import java.sql.SQLException; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.UUID; -import java.util.function.Consumer; -import javax.sql.DataSource; -import org.apache.commons.lang3.NotImplementedException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public abstract class AbstractJdbcDestination - extends JdbcConnector implements Destination { - - private static final Logger LOGGER = LoggerFactory.getLogger(AbstractJdbcDestination.class); - - public static final String RAW_SCHEMA_OVERRIDE = "raw_data_schema"; - - public static final String DISABLE_TYPE_DEDUPE = "disable_type_dedupe"; - - private final NamingConventionTransformer namingResolver; - private final SqlOperations sqlOperations; - - protected NamingConventionTransformer getNamingResolver() { - return namingResolver; - } - - protected SqlOperations getSqlOperations() { - return sqlOperations; - } - - protected String getConfigSchemaKey() { - return "schema"; - } - - public AbstractJdbcDestination(final String driverClass, - final NamingConventionTransformer namingResolver, - final SqlOperations sqlOperations) { - super(driverClass); - this.namingResolver = namingResolver; - this.sqlOperations = sqlOperations; - } - - @Override - public AirbyteConnectionStatus check(final JsonNode config) { - final DataSource dataSource = getDataSource(config); - - try { - final JdbcDatabase database = getDatabase(dataSource); - final String outputSchema = namingResolver.getIdentifier(config.get(JdbcUtils.SCHEMA_KEY).asText()); - attemptTableOperations(outputSchema, database, namingResolver, sqlOperations, false); - if (TypingAndDedupingFlag.isDestinationV2()) { - final var v2RawSchema = namingResolver.getIdentifier(TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE) - .orElse(DEFAULT_AIRBYTE_INTERNAL_NAMESPACE)); - attemptTableOperations(v2RawSchema, database, namingResolver, sqlOperations, false); - destinationSpecificTableOperations(database); - } - return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); - } catch (final ConnectionErrorException ex) { - final String message = getErrorMessage(ex.getStateCode(), ex.getErrorCode(), ex.getExceptionMessage(), ex); - AirbyteTraceMessageUtility.emitConfigErrorTrace(ex, message); - return new AirbyteConnectionStatus() - .withStatus(Status.FAILED) - .withMessage(message); - } catch (final Exception e) { - LOGGER.error("Exception while checking connection: ", e); - return new AirbyteConnectionStatus() - .withStatus(Status.FAILED) - .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); - } finally { - try { - DataSourceFactory.close(dataSource); - } catch (final Exception e) { - LOGGER.warn("Unable to close data source.", e); - } - } - } - - /** - * Specific Databases may have additional checks unique to them which they need to perform, override - * this method to add additional checks. - * - * @param database the database to run checks against - * @throws Exception - */ - protected void destinationSpecificTableOperations(final JdbcDatabase database) throws Exception {} - - /** - * This method is deprecated. It verifies table creation, but not insert right to a newly created - * table. Use attemptTableOperations with the attemptInsert argument instead. - */ - @Deprecated - public static void attemptSQLCreateAndDropTableOperations(final String outputSchema, - final JdbcDatabase database, - final NamingConventionTransformer namingResolver, - final SqlOperations sqlOps) - throws Exception { - attemptTableOperations(outputSchema, database, namingResolver, sqlOps, false); - } - - /** - * Verifies if provided creds has enough permissions. Steps are: 1. Create schema if not exists. 2. - * Create test table. 3. Insert dummy record to newly created table if "attemptInsert" set to true. - * 4. Delete table created on step 2. - * - * @param outputSchema - schema to tests against. - * @param database - database to tests against. - * @param namingResolver - naming resolver. - * @param sqlOps - SqlOperations object - * @param attemptInsert - set true if need to make attempt to insert dummy records to newly created - * table. Set false to skip insert step. - */ - public static void attemptTableOperations(final String outputSchema, - final JdbcDatabase database, - final NamingConventionTransformer namingResolver, - final SqlOperations sqlOps, - final boolean attemptInsert) - throws Exception { - // verify we have write permissions on the target schema by creating a table with a random name, - // then dropping that table - try { - // Get metadata from the database to see whether connection is possible - database.bufferedResultSetQuery(conn -> conn.getMetaData().getCatalogs(), JdbcUtils.defaultSourceOperations::rowToJson); - - // verify we have write permissions on the target schema by creating a table with a random name, - // then dropping that table - final String outputTableName = namingResolver.getIdentifier("_airbyte_connection_test_" + UUID.randomUUID().toString().replaceAll("-", "")); - sqlOps.createSchemaIfNotExists(database, outputSchema); - sqlOps.createTableIfNotExists(database, outputSchema, outputTableName); - // verify if user has permission to make SQL INSERT queries - try { - if (attemptInsert) { - sqlOps.insertRecords(database, List.of(getDummyRecord()), outputSchema, outputTableName); - } - } finally { - sqlOps.dropTableIfExists(database, outputSchema, outputTableName); - } - } catch (final SQLException e) { - if (Objects.isNull(e.getCause()) || !(e.getCause() instanceof SQLException)) { - throw new ConnectionErrorException(e.getSQLState(), e.getErrorCode(), e.getMessage(), e); - } else { - final SQLException cause = (SQLException) e.getCause(); - throw new ConnectionErrorException(e.getSQLState(), cause.getErrorCode(), cause.getMessage(), e); - } - } catch (final Exception e) { - throw new Exception(e); - } - } - - /** - * Generates a dummy AirbyteRecordMessage with random values. - * - * @return AirbyteRecordMessage object with dummy values that may be used to test insert permission. - */ - private static PartialAirbyteMessage getDummyRecord() { - final JsonNode dummyDataToInsert = Jsons.deserialize("{ \"field1\": true }"); - return new PartialAirbyteMessage() - .withRecord(new PartialAirbyteRecordMessage() - .withStream("stream1") - .withEmittedAt(1602637589000L)) - .withSerialized(dummyDataToInsert.toString()); - } - - /** - * Subclasses which need to modify the DataSource should override - * {@link #modifyDataSourceBuilder(DataSourceFactory.DataSourceBuilder)} rather than this method. - */ - @VisibleForTesting - public DataSource getDataSource(final JsonNode config) { - final JsonNode jdbcConfig = toJdbcConfig(config); - final Map connectionProperties = getConnectionProperties(config); - final DataSourceFactory.DataSourceBuilder builder = new DataSourceFactory.DataSourceBuilder( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - driverClassName, - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText()) - .withConnectionProperties(connectionProperties) - .withConnectionTimeout(getConnectionTimeout(connectionProperties)); - return modifyDataSourceBuilder(builder).build(); - } - - protected DataSourceFactory.DataSourceBuilder modifyDataSourceBuilder(final DataSourceFactory.DataSourceBuilder builder) { - return builder; - } - - @VisibleForTesting - public JdbcDatabase getDatabase(final DataSource dataSource) { - return new DefaultJdbcDatabase(dataSource); - } - - protected Map getConnectionProperties(final JsonNode config) { - final Map customProperties = JdbcUtils.parseJdbcParameters(config, JdbcUtils.JDBC_URL_PARAMS_KEY); - final Map defaultProperties = getDefaultConnectionProperties(config); - assertCustomParametersDontOverwriteDefaultParameters(customProperties, defaultProperties); - return MoreMaps.merge(customProperties, defaultProperties); - } - - private void assertCustomParametersDontOverwriteDefaultParameters(final Map customParameters, - final Map defaultParameters) { - for (final String key : defaultParameters.keySet()) { - if (customParameters.containsKey(key) && !Objects.equals(customParameters.get(key), defaultParameters.get(key))) { - throw new IllegalArgumentException("Cannot overwrite default JDBC parameter " + key); - } - } - } - - protected abstract Map getDefaultConnectionProperties(final JsonNode config); - - public abstract JsonNode toJdbcConfig(JsonNode config); - - protected abstract JdbcSqlGenerator getSqlGenerator(); - - protected abstract JdbcDestinationHandler getDestinationHandler(final String databaseName, - final JdbcDatabase database, - final String rawTableSchema); - - /** - * Provide any migrations that the destination needs to run. Most destinations will need to provide - * an instande of - * {@link io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcV1V2Migrator} at minimum. - */ - protected abstract List> getMigrations( - final JdbcDatabase database, - final String databaseName, - final SqlGenerator sqlGenerator, - final DestinationHandler destinationHandler); - - /** - * "database" key at root of the config json, for any other variants in config, override this - * method. - * - * @param config - * @return - */ - protected String getDatabaseName(final JsonNode config) { - return config.get(JdbcUtils.DATABASE_KEY).asText(); - } - - protected StreamAwareDataTransformer getDataTransformer(final ParsedCatalog parsedCatalog, - final String defaultNamespace) { - return new IdentityDataTransformer(); - } - - @Override - public AirbyteMessageConsumer getConsumer(final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector) { - throw new NotImplementedException("Should use the getSerializedMessageConsumer instead"); - } - - @Override - public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector) - throws Exception { - final JdbcDatabase database = getDatabase(getDataSource(config)); - // Short circuit for non-v2 destinations. - if (!TypingAndDedupingFlag.isDestinationV2()) { - return JdbcBufferedConsumerFactory.createAsync( - outputRecordCollector, - database, - sqlOperations, - namingResolver, - config, - catalog, - null, - new NoopTyperDeduper()); - } - - final String defaultNamespace = config.get(getConfigSchemaKey()).asText(); - addDefaultNamespaceToStreams(catalog, defaultNamespace); - return getV2MessageConsumer(config, catalog, outputRecordCollector, database, defaultNamespace); - } - - private SerializedAirbyteMessageConsumer getV2MessageConsumer(final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector, - final JdbcDatabase database, - final String defaultNamespace) { - final JdbcSqlGenerator sqlGenerator = getSqlGenerator(); - Optional rawNamespaceOverride = TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE); - final ParsedCatalog parsedCatalog = rawNamespaceOverride - .map(override -> new CatalogParser(sqlGenerator, override)) - .orElse(new CatalogParser(sqlGenerator)) - .parseCatalog(catalog); - final String databaseName = getDatabaseName(config); - final var migrator = new JdbcV1V2Migrator(namingResolver, database, databaseName); - final NoopV2TableMigrator v2TableMigrator = new NoopV2TableMigrator(); - final DestinationHandler destinationHandler = - getDestinationHandler(databaseName, database, rawNamespaceOverride.orElse(DEFAULT_AIRBYTE_INTERNAL_NAMESPACE)); - final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); - final TyperDeduper typerDeduper; - List> migrations = getMigrations(database, databaseName, sqlGenerator, destinationHandler); - if (disableTypeDedupe) { - typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, destinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations); - } else { - typerDeduper = - new DefaultTyperDeduper<>(sqlGenerator, destinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations); - } - - return JdbcBufferedConsumerFactory.createAsync( - outputRecordCollector, - database, - sqlOperations, - namingResolver, - config, - catalog, - defaultNamespace, - typerDeduper, - getDataTransformer(parsedCatalog, defaultNamespace)); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/ColumnDefinition.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/ColumnDefinition.java deleted file mode 100644 index fe41101366c2..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/ColumnDefinition.java +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -/** - * Jdbc destination column definition representation - * - * @param name - * @param type - * @param columnSize - */ -public record ColumnDefinition(String name, String type, int columnSize, boolean isNullable) { - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java deleted file mode 100644 index 4e945e6f023e..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE; -import static io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination.RAW_SCHEMA_OVERRIDE; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.base.Preconditions; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer; -import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.destination.StreamSyncSummary; -import io.airbyte.cdk.integrations.destination.async.AsyncStreamConsumer; -import io.airbyte.cdk.integrations.destination.async.buffers.BufferManager; -import io.airbyte.cdk.integrations.destination.async.deser.DeserializationUtil; -import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer; -import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage; -import io.airbyte.cdk.integrations.destination.async.state.FlushFailure; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnCloseFunction; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnStartFunction; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.RecordWriter; -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.AirbyteRecordMessage; -import io.airbyte.protocol.models.v0.AirbyteStream; -import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.v0.DestinationSyncMode; -import io.airbyte.protocol.models.v0.StreamDescriptor; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.Executors; -import java.util.function.Consumer; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Strategy: - *

- * 1. Create a final table for each stream - *

- * 2. Accumulate records in a buffer. One buffer per stream - *

- * 3. As records accumulate write them in batch to the database. We set a minimum numbers of records - * before writing to avoid wasteful record-wise writes. In the case with slow syncs this will be - * superseded with a periodic record flush from {@link BufferedStreamConsumer#periodicBufferFlush()} - *

- * 4. Once all records have been written to buffer, flush the buffer and write any remaining records - * to the database (regardless of how few are left) - */ -public class JdbcBufferedConsumerFactory { - - private static final Logger LOGGER = LoggerFactory.getLogger(JdbcBufferedConsumerFactory.class); - - public static SerializedAirbyteMessageConsumer createAsync(final Consumer outputRecordCollector, - final JdbcDatabase database, - final SqlOperations sqlOperations, - final NamingConventionTransformer namingResolver, - final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final String defaultNamespace, - final TyperDeduper typerDeduper, - final StreamAwareDataTransformer dataTransformer) { - final List writeConfigs = createWriteConfigs(namingResolver, config, catalog, sqlOperations.isSchemaRequired()); - return new AsyncStreamConsumer( - outputRecordCollector, - onStartFunction(database, sqlOperations, writeConfigs, typerDeduper), - onCloseFunction(typerDeduper), - new JdbcInsertFlushFunction(recordWriterFunction(database, sqlOperations, writeConfigs, catalog)), - catalog, - new BufferManager((long) (Runtime.getRuntime().maxMemory() * 0.2)), - new FlushFailure(), - Optional.ofNullable(defaultNamespace), - Executors.newFixedThreadPool(2), - dataTransformer, - new DeserializationUtil()); - } - - public static SerializedAirbyteMessageConsumer createAsync(final Consumer outputRecordCollector, - final JdbcDatabase database, - final SqlOperations sqlOperations, - final NamingConventionTransformer namingResolver, - final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final String defaultNamespace, - final TyperDeduper typerDeduper) { - return createAsync(outputRecordCollector, database, sqlOperations, namingResolver, config, catalog, defaultNamespace, typerDeduper, - new IdentityDataTransformer()); - } - - private static List createWriteConfigs(final NamingConventionTransformer namingResolver, - final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final boolean schemaRequired) { - if (schemaRequired) { - Preconditions.checkState(config.has("schema"), "jdbc destinations must specify a schema."); - } - return catalog.getStreams().stream().map(toWriteConfig(namingResolver, config, schemaRequired)).collect(Collectors.toList()); - } - - private static Function toWriteConfig( - final NamingConventionTransformer namingResolver, - final JsonNode config, - final boolean schemaRequired) { - return stream -> { - Preconditions.checkNotNull(stream.getDestinationSyncMode(), "Undefined destination sync mode"); - final AirbyteStream abStream = stream.getStream(); - - final String defaultSchemaName = schemaRequired ? namingResolver.getIdentifier(config.get("schema").asText()) - : namingResolver.getIdentifier(config.get(JdbcUtils.DATABASE_KEY).asText()); - // Method checks for v2 - final String outputSchema = getOutputSchema(abStream, defaultSchemaName, namingResolver); - final String streamName = abStream.getName(); - final String tableName; - final String tmpTableName; - // TODO: Should this be injected from outside ? - if (TypingAndDedupingFlag.isDestinationV2()) { - final var finalSchema = Optional.ofNullable(abStream.getNamespace()).orElse(defaultSchemaName); - final var rawName = StreamId.concatenateRawTableName(finalSchema, streamName); - tableName = namingResolver.convertStreamName(rawName); - tmpTableName = namingResolver.getTmpTableName(rawName); - } else { - tableName = namingResolver.getRawTableName(streamName); - tmpTableName = namingResolver.getTmpTableName(streamName); - } - final DestinationSyncMode syncMode = stream.getDestinationSyncMode(); - - final WriteConfig writeConfig = new WriteConfig(streamName, abStream.getNamespace(), outputSchema, tmpTableName, tableName, syncMode); - LOGGER.info("Write config: {}", writeConfig); - - return writeConfig; - }; - } - - /** - * Defer to the {@link AirbyteStream}'s namespace. If this is not set, use the destination's default - * schema. This namespace is source-provided, and can be potentially empty. - *

- * The logic here matches the logic in the catalog_process.py for Normalization. Any modifications - * need to be reflected there and vice versa. - */ - private static String getOutputSchema(final AirbyteStream stream, - final String defaultDestSchema, - final NamingConventionTransformer namingResolver) { - if (TypingAndDedupingFlag.isDestinationV2()) { - return namingResolver - .getNamespace(TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE).orElse(DEFAULT_AIRBYTE_INTERNAL_NAMESPACE)); - } else { - return namingResolver.getNamespace(Optional.ofNullable(stream.getNamespace()).orElse(defaultDestSchema)); - } - } - - /** - * Sets up destination storage through: - *

- * 1. Creates Schema (if not exists) - *

- * 2. Creates airybte_raw table (if not exists) - *

- * 3. Truncates table if sync mode is in OVERWRITE - * - * @param database JDBC database to connect to - * @param sqlOperations interface for execution SQL queries - * @param writeConfigs settings for each stream - */ - private static OnStartFunction onStartFunction(final JdbcDatabase database, - final SqlOperations sqlOperations, - final Collection writeConfigs, - final TyperDeduper typerDeduper) { - return () -> { - typerDeduper.prepareSchemasAndRunMigrations(); - LOGGER.info("Preparing raw tables in destination started for {} streams", writeConfigs.size()); - final List queryList = new ArrayList<>(); - for (final WriteConfig writeConfig : writeConfigs) { - final String schemaName = writeConfig.getOutputSchemaName(); - final String dstTableName = writeConfig.getOutputTableName(); - LOGGER.info("Preparing raw table in destination started for stream {}. schema: {}, table name: {}", - writeConfig.getStreamName(), - schemaName, - dstTableName); - sqlOperations.createSchemaIfNotExists(database, schemaName); - sqlOperations.createTableIfNotExists(database, schemaName, dstTableName); - switch (writeConfig.getSyncMode()) { - case OVERWRITE -> queryList.add(sqlOperations.truncateTableQuery(database, schemaName, dstTableName)); - case APPEND, APPEND_DEDUP -> {} - default -> throw new IllegalStateException("Unrecognized sync mode: " + writeConfig.getSyncMode()); - } - } - sqlOperations.executeTransaction(database, queryList); - LOGGER.info("Preparing raw tables in destination completed."); - typerDeduper.prepareFinalTables(); - }; - } - - /** - * Writes {@link AirbyteRecordMessage} to JDBC database's airbyte_raw table - * - * @param database JDBC database to connect to - * @param sqlOperations interface of SQL queries to execute - * @param writeConfigs settings for each stream - * @param catalog catalog of all streams to sync - */ - private static RecordWriter recordWriterFunction(final JdbcDatabase database, - final SqlOperations sqlOperations, - final List writeConfigs, - final ConfiguredAirbyteCatalog catalog) { - final Map pairToWriteConfig = writeConfigs.stream() - .collect(Collectors.toUnmodifiableMap(JdbcBufferedConsumerFactory::toNameNamespacePair, Function.identity())); - - return (pair, records) -> { - if (!pairToWriteConfig.containsKey(pair)) { - throw new IllegalArgumentException( - String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s", Jsons.serialize(catalog))); - } - - final WriteConfig writeConfig = pairToWriteConfig.get(pair); - sqlOperations.insertRecords(database, new ArrayList<>(records), writeConfig.getOutputSchemaName(), writeConfig.getOutputTableName()); - }; - } - - /** - * Tear down functionality - */ - @SuppressWarnings("unchecked") - private static OnCloseFunction onCloseFunction(final TyperDeduper typerDeduper) { - return (hasFailed, streamSyncSummaries) -> { - try { - typerDeduper.typeAndDedupe((Map) streamSyncSummaries); - typerDeduper.commitFinalTables(); - typerDeduper.cleanup(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - }; - } - - private static AirbyteStreamNameNamespacePair toNameNamespacePair(final WriteConfig config) { - return new AirbyteStreamNameNamespacePair(config.getStreamName(), config.getNamespace()); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.java deleted file mode 100644 index afc5d38818f6..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import io.airbyte.cdk.integrations.destination.async.function.DestinationFlushFunction; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.RecordWriter; -import io.airbyte.cdk.integrations.destination.jdbc.constants.GlobalDataSizeConstants; -import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; -import io.airbyte.protocol.models.v0.StreamDescriptor; -import java.util.stream.Stream; - -public class JdbcInsertFlushFunction implements DestinationFlushFunction { - - private final RecordWriter recordWriter; - - public JdbcInsertFlushFunction(final RecordWriter recordWriter) { - this.recordWriter = recordWriter; - } - - @Override - public void flush(final StreamDescriptor desc, final Stream stream) throws Exception { - recordWriter.accept( - new AirbyteStreamNameNamespacePair(desc.getName(), desc.getNamespace()), - stream.toList()); - } - - @Override - public long getOptimalBatchSizeBytes() { - // TODO tune this value - currently SqlOperationUtils partitions 10K records per insert statement, - // but we'd like to stop doing that and instead control sql insert statement size via batch size. - return GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES; - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java deleted file mode 100644 index a411abe8e396..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.JavaBaseConstants; -import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage; -import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.json.Jsons; -import java.io.File; -import java.io.PrintWriter; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.time.Instant; -import java.util.HashSet; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; - -public abstract class JdbcSqlOperations implements SqlOperations { - - protected static final String SHOW_SCHEMAS = "show schemas;"; - protected static final String NAME = "name"; - protected final Set schemaSet = new HashSet<>(); - - protected JdbcSqlOperations() {} - - @Override - public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { - try { - if (!schemaSet.contains(schemaName) && !isSchemaExists(database, schemaName)) { - database.execute(String.format("CREATE SCHEMA IF NOT EXISTS %s;", schemaName)); - schemaSet.add(schemaName); - } - } catch (final Exception e) { - throw checkForKnownConfigExceptions(e).orElseThrow(() -> e); - } - } - - /** - * When an exception occurs, we may recognize it as an issue with the users permissions or other - * configuration options. In these cases, we can wrap the exception in a - * {@link ConfigErrorException} which will exclude the error from our on-call paging/reporting - * - * @param e the exception to check. - * @return A ConfigErrorException with a message with actionable feedback to the user. - */ - protected Optional checkForKnownConfigExceptions(final Exception e) { - return Optional.empty(); - } - - @Override - public void createTableIfNotExists(final JdbcDatabase database, final String schemaName, final String tableName) throws SQLException { - try { - database.execute(createTableQuery(database, schemaName, tableName)); - for (final String postCreateSql : postCreateTableQueries(schemaName, tableName)) { - database.execute(postCreateSql); - } - } catch (final SQLException e) { - throw checkForKnownConfigExceptions(e).orElseThrow(() -> e); - } - } - - @Override - public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { - if (TypingAndDedupingFlag.isDestinationV2()) { - return createTableQueryV2(schemaName, tableName); - } else { - return createTableQueryV1(schemaName, tableName); - } - } - - /** - * Some subclasses may want to execute additional SQL statements after creating the raw table. For - * example, Postgres does not support index definitions within a CREATE TABLE statement, so we need - * to run CREATE INDEX statements after creating the table. - */ - protected List postCreateTableQueries(final String schemaName, final String tableName) { - return List.of(); - } - - protected String createTableQueryV1(final String schemaName, final String tableName) { - return String.format( - """ - CREATE TABLE IF NOT EXISTS %s.%s ( - %s VARCHAR PRIMARY KEY, - %s JSONB, - %s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP - ); - """, - schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); - } - - protected String createTableQueryV2(final String schemaName, final String tableName) { - // Note that Meta is the last column in order, there was a time when tables didn't have meta, - // we issued Alter to add that column so it should be the last column. - return String.format( - """ - CREATE TABLE IF NOT EXISTS %s.%s ( - %s VARCHAR PRIMARY KEY, - %s JSONB, - %s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, - %s TIMESTAMP WITH TIME ZONE DEFAULT NULL, - %s JSONB - ); - """, - schemaName, - tableName, - JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, - JavaBaseConstants.COLUMN_NAME_DATA, - JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, - JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT, - JavaBaseConstants.COLUMN_NAME_AB_META); - } - - // TODO: This method seems to be used by Postgres and others while staging to local temp files. - // Should there be a Local staging operations equivalent - protected void writeBatchToFile(final File tmpFile, final List records) throws Exception { - try (final PrintWriter writer = new PrintWriter(tmpFile, StandardCharsets.UTF_8); - final CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT)) { - for (final PartialAirbyteMessage record : records) { - final var uuid = UUID.randomUUID().toString(); - final var jsonData = record.getSerialized(); - final var airbyteMeta = Jsons.serialize(record.getRecord().getMeta()); - final var extractedAt = Timestamp.from(Instant.ofEpochMilli(record.getRecord().getEmittedAt())); - if (TypingAndDedupingFlag.isDestinationV2()) { - csvPrinter.printRecord(uuid, jsonData, extractedAt, null, airbyteMeta); - } else { - csvPrinter.printRecord(uuid, jsonData, extractedAt); - } - } - } - } - - @Override - public String truncateTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { - return String.format("TRUNCATE TABLE %s.%s;\n", schemaName, tableName); - } - - @Override - public String insertTableQuery(final JdbcDatabase database, final String schemaName, final String srcTableName, final String dstTableName) { - return String.format("INSERT INTO %s.%s SELECT * FROM %s.%s;\n", schemaName, dstTableName, schemaName, srcTableName); - } - - @Override - public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { - final StringBuilder appendedQueries = new StringBuilder(); - appendedQueries.append("BEGIN;\n"); - for (final String query : queries) { - appendedQueries.append(query); - } - appendedQueries.append("COMMIT;"); - database.execute(appendedQueries.toString()); - } - - @Override - public void dropTableIfExists(final JdbcDatabase database, final String schemaName, final String tableName) throws SQLException { - try { - database.execute(dropTableIfExistsQuery(schemaName, tableName)); - } catch (final SQLException e) { - throw checkForKnownConfigExceptions(e).orElseThrow(() -> e); - } - } - - public String dropTableIfExistsQuery(final String schemaName, final String tableName) { - return String.format("DROP TABLE IF EXISTS %s.%s;\n", schemaName, tableName); - } - - @Override - public boolean isSchemaRequired() { - return true; - } - - @Override - public boolean isValidData(final JsonNode data) { - return true; - } - - @Override - public final void insertRecords(final JdbcDatabase database, - final List records, - final String schemaName, - final String tableName) - throws Exception { - if (TypingAndDedupingFlag.isDestinationV2()) { - insertRecordsInternalV2(database, records, schemaName, tableName); - } else { - insertRecordsInternal(database, records, schemaName, tableName); - } - } - - protected abstract void insertRecordsInternal(JdbcDatabase database, - List records, - String schemaName, - String tableName) - throws Exception; - - protected abstract void insertRecordsInternalV2(JdbcDatabase database, - List records, - String schemaName, - String tableName) - throws Exception; - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.java deleted file mode 100644 index 7e53b7c92d5c..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Iterables; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.time.Instant; -import java.util.List; -import java.util.UUID; -import java.util.function.Supplier; - -public class SqlOperationsUtils { - - /** - * Inserts "raw" records in a single query. The purpose of helper to abstract away database-specific - * SQL syntax from this query. - * - * @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id, - * data, emitted_at) - * @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?) - * @param jdbcDatabase jdbc database - * @param records records to write - * @throws SQLException exception - */ - public static void insertRawRecordsInSingleQuery(final String insertQueryComponent, - final String recordQueryComponent, - final JdbcDatabase jdbcDatabase, - final List records) - throws SQLException { - insertRawRecordsInSingleQuery(insertQueryComponent, recordQueryComponent, jdbcDatabase, records, UUID::randomUUID, true); - } - - /** - * Inserts "raw" records in a single query. The purpose of helper to abstract away database-specific - * SQL syntax from this query. - * - * This version does not add a semicolon at the end of the INSERT statement. - * - * @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id, - * data, emitted_at) - * @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?) - * @param jdbcDatabase jdbc database - * @param records records to write - * @throws SQLException exception - */ - public static void insertRawRecordsInSingleQueryNoSem(final String insertQueryComponent, - final String recordQueryComponent, - final JdbcDatabase jdbcDatabase, - final List records) - throws SQLException { - insertRawRecordsInSingleQuery(insertQueryComponent, recordQueryComponent, jdbcDatabase, records, UUID::randomUUID, false); - } - - @VisibleForTesting - static void insertRawRecordsInSingleQuery(final String insertQueryComponent, - final String recordQueryComponent, - final JdbcDatabase jdbcDatabase, - final List records, - final Supplier uuidSupplier, - final boolean sem) - throws SQLException { - if (records.isEmpty()) { - return; - } - - jdbcDatabase.execute(connection -> { - - // Strategy: We want to use PreparedStatement because it handles binding values to the SQL query - // (e.g. handling formatting timestamps). A PreparedStatement statement is created by supplying the - // full SQL string at creation time. Then subsequently specifying which values are bound to the - // string. Thus there will be two loops below. - // 1) Loop over records to build the full string. - // 2) Loop over the records and bind the appropriate values to the string. - // We also partition the query to run on 10k records at a time, since some DBs set a max limit on - // how many records can be inserted at once - // TODO(sherif) this should use a smarter, destination-aware partitioning scheme instead of 10k by - // default - for (final List partition : Iterables.partition(records, 10_000)) { - final StringBuilder sql = new StringBuilder(insertQueryComponent); - partition.forEach(r -> sql.append(recordQueryComponent)); - final String s = sql.toString(); - final String s1 = s.substring(0, s.length() - 2) + (sem ? ";" : ""); - - try (final PreparedStatement statement = connection.prepareStatement(s1)) { - // second loop: bind values to the SQL string. - // 1-indexed - int i = 1; - for (final PartialAirbyteMessage message : partition) { - // Airbyte Raw ID - statement.setString(i, uuidSupplier.get().toString()); - i++; - - // Message Data - statement.setString(i, message.getSerialized()); - i++; - - // Extracted At - statement.setTimestamp(i, Timestamp.from(Instant.ofEpochMilli(message.getRecord().getEmittedAt()))); - i++; - - if (TypingAndDedupingFlag.isDestinationV2()) { - // Loaded At - statement.setTimestamp(i, null); - i++; - } - } - - statement.execute(); - } - } - }); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.java deleted file mode 100644 index 35b8380f70bb..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import io.airbyte.protocol.models.v0.DestinationSyncMode; -import java.time.Instant; - -/** - * Write configuration POJO (plain old java object) for all destinations extending - * {@link AbstractJdbcDestination}. - */ -public class WriteConfig { - - private final String streamName; - private final String namespace; - private final String outputSchemaName; - private final String tmpTableName; - private final String outputTableName; - private final DestinationSyncMode syncMode; - private final Instant writeDatetime; - - public WriteConfig(final String streamName, - final String namespace, - final String outputSchemaName, - final String tmpTableName, - final String outputTableName, - final DestinationSyncMode syncMode) { - this(streamName, namespace, outputSchemaName, tmpTableName, outputTableName, syncMode, Instant.now()); - } - - public WriteConfig(final String streamName, - final String namespace, - final String outputSchemaName, - final String tmpTableName, - final String outputTableName, - final DestinationSyncMode syncMode, - final Instant writeDatetime) { - this.streamName = streamName; - this.namespace = namespace; - this.outputSchemaName = outputSchemaName; - this.tmpTableName = tmpTableName; - this.outputTableName = outputTableName; - this.syncMode = syncMode; - this.writeDatetime = writeDatetime; - } - - public String getStreamName() { - return streamName; - } - - /** - * This is used in {@link JdbcBufferedConsumerFactory} to verify that record is from expected - * streams - * - * @return - */ - public String getNamespace() { - return namespace; - } - - public String getTmpTableName() { - return tmpTableName; - } - - public String getOutputSchemaName() { - return outputSchemaName; - } - - public String getOutputTableName() { - return outputTableName; - } - - public DestinationSyncMode getSyncMode() { - return syncMode; - } - - public Instant getWriteDatetime() { - return writeDatetime; - } - - @Override - public String toString() { - return "WriteConfig{" + - "streamName=" + streamName + - ", namespace=" + namespace + - ", outputSchemaName=" + outputSchemaName + - ", tmpTableName=" + tmpTableName + - ", outputTableName=" + outputTableName + - ", syncMode=" + syncMode + - '}'; - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.java deleted file mode 100644 index cd76b6d0ca0e..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.copy; - -import static io.airbyte.cdk.integrations.destination.jdbc.constants.GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES; - -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; -import io.airbyte.cdk.integrations.destination.StandardNameTransformer; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.CheckAndRemoveRecordWriter; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnCloseFunction; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnStartFunction; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.RecordWriter; -import io.airbyte.cdk.integrations.destination.jdbc.SqlOperations; -import io.airbyte.cdk.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.AirbyteRecordMessage; -import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.function.Consumer; -import javax.sql.DataSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class CopyConsumerFactory { - - private static final Logger LOGGER = LoggerFactory.getLogger(CopyConsumerFactory.class); - - public static AirbyteMessageConsumer create(final Consumer outputRecordCollector, - final DataSource dataSource, - final JdbcDatabase database, - final SqlOperations sqlOperations, - final StandardNameTransformer namingResolver, - final T config, - final ConfiguredAirbyteCatalog catalog, - final StreamCopierFactory streamCopierFactory, - final String defaultSchema) { - final Map pairToCopier = createWriteConfigs( - namingResolver, - config, - catalog, - streamCopierFactory, - defaultSchema, - database, - sqlOperations); - - final Map pairToIgnoredRecordCount = new HashMap<>(); - return new BufferedStreamConsumer( - outputRecordCollector, - onStartFunction(pairToIgnoredRecordCount), - new InMemoryRecordBufferingStrategy( - recordWriterFunction(pairToCopier, sqlOperations, pairToIgnoredRecordCount), - removeStagingFilePrinter(pairToCopier), - DEFAULT_MAX_BATCH_SIZE_BYTES), - onCloseFunction(pairToCopier, database, sqlOperations, pairToIgnoredRecordCount, dataSource), - catalog, - sqlOperations::isValidData); - } - - private static Map createWriteConfigs(final StandardNameTransformer namingResolver, - final T config, - final ConfiguredAirbyteCatalog catalog, - final StreamCopierFactory streamCopierFactory, - final String defaultSchema, - final JdbcDatabase database, - final SqlOperations sqlOperations) { - final Map pairToCopier = new HashMap<>(); - final String stagingFolder = UUID.randomUUID().toString(); - for (final var configuredStream : catalog.getStreams()) { - final var stream = configuredStream.getStream(); - final var pair = AirbyteStreamNameNamespacePair.fromAirbyteStream(stream); - final var copier = streamCopierFactory.create(defaultSchema, config, stagingFolder, configuredStream, namingResolver, database, sqlOperations); - - pairToCopier.put(pair, copier); - } - - return pairToCopier; - } - - private static OnStartFunction onStartFunction(final Map pairToIgnoredRecordCount) { - return pairToIgnoredRecordCount::clear; - } - - private static RecordWriter recordWriterFunction(final Map pairToCopier, - final SqlOperations sqlOperations, - final Map pairToIgnoredRecordCount) { - return (final AirbyteStreamNameNamespacePair pair, final List records) -> { - final var fileName = pairToCopier.get(pair).prepareStagingFile(); - for (final AirbyteRecordMessage recordMessage : records) { - final var id = UUID.randomUUID(); - if (sqlOperations.isValidData(recordMessage.getData())) { - // TODO Truncate json data instead of throwing whole record away? - // or should we upload it into a special rejected record folder in s3 instead? - pairToCopier.get(pair).write(id, recordMessage, fileName); - } else { - pairToIgnoredRecordCount.put(pair, pairToIgnoredRecordCount.getOrDefault(pair, 0L) + 1L); - } - } - }; - } - - private static CheckAndRemoveRecordWriter removeStagingFilePrinter(final Map pairToCopier) { - return (final AirbyteStreamNameNamespacePair pair, final String stagingFileName) -> { - final String currentFileName = pairToCopier.get(pair).getCurrentFile(); - if (stagingFileName != null && currentFileName != null && !stagingFileName.equals(currentFileName)) { - pairToCopier.get(pair).closeNonCurrentStagingFileWriters(); - } - return currentFileName; - }; - } - - private static OnCloseFunction onCloseFunction(final Map pairToCopier, - final JdbcDatabase database, - final SqlOperations sqlOperations, - final Map pairToIgnoredRecordCount, - final DataSource dataSource) { - return (hasFailed, streamSyncSummaries) -> { - pairToIgnoredRecordCount - .forEach((pair, count) -> LOGGER.warn("A total of {} record(s) of data from stream {} were invalid and were ignored.", count, pair)); - closeAsOneTransaction(pairToCopier, hasFailed, database, sqlOperations, dataSource); - }; - } - - private static void closeAsOneTransaction(final Map pairToCopier, - boolean hasFailed, - final JdbcDatabase db, - final SqlOperations sqlOperations, - final DataSource dataSource) - throws Exception { - Exception firstException = null; - final List streamCopiers = new ArrayList<>(pairToCopier.values()); - try { - final List queries = new ArrayList<>(); - for (final var copier : streamCopiers) { - try { - copier.closeStagingUploader(hasFailed); - if (!hasFailed) { - copier.createDestinationSchema(); - copier.createTemporaryTable(); - copier.copyStagingFileToTemporaryTable(); - final var destTableName = copier.createDestinationTable(); - final var mergeQuery = copier.generateMergeStatement(destTableName); - queries.add(mergeQuery); - } - } catch (final Exception e) { - final String message = String.format("Failed to finalize copy to temp table due to: %s", e); - LOGGER.error(message); - hasFailed = true; - if (firstException == null) { - firstException = e; - } - } - } - if (!hasFailed) { - sqlOperations.executeTransaction(db, queries); - } - } finally { - for (final var copier : streamCopiers) { - copier.removeFileAndDropTmpTable(); - } - - DataSourceFactory.close(dataSource); - } - if (firstException != null) { - throw firstException; - } - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.java deleted file mode 100644 index a5d36f65dcd8..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.copy; - -import static io.airbyte.cdk.integrations.base.errors.messages.ErrorMessage.getErrorMessage; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.BaseConnector; -import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; -import io.airbyte.cdk.integrations.base.Destination; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.destination.StandardNameTransformer; -import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination; -import io.airbyte.cdk.integrations.destination.jdbc.SqlOperations; -import io.airbyte.commons.exceptions.ConnectionErrorException; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; -import javax.sql.DataSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public abstract class CopyDestination extends BaseConnector implements Destination { - - private static final Logger LOGGER = LoggerFactory.getLogger(CopyDestination.class); - - /** - * The default database schema field in the destination config is "schema". To change it, pass the - * field name to the constructor. - */ - private String schemaFieldName = "schema"; - - public CopyDestination() {} - - public CopyDestination(final String schemaFieldName) { - this.schemaFieldName = schemaFieldName; - } - - /** - * A self contained method for writing a file to the persistence for testing. This method should try - * to clean up after itself by deleting the file it creates. - */ - public abstract void checkPersistence(JsonNode config) throws Exception; - - public abstract StandardNameTransformer getNameTransformer(); - - public abstract DataSource getDataSource(JsonNode config); - - public abstract JdbcDatabase getDatabase(DataSource dataSource); - - public abstract SqlOperations getSqlOperations(); - - @Override - public AirbyteConnectionStatus check(final JsonNode config) { - try { - checkPersistence(config); - } catch (final Exception e) { - LOGGER.error("Exception attempting to access the staging persistence: ", e); - return new AirbyteConnectionStatus() - .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage("Could not connect to the staging persistence with the provided configuration. \n" + e.getMessage()); - } - - final DataSource dataSource = getDataSource(config); - - try { - final JdbcDatabase database = getDatabase(dataSource); - final var nameTransformer = getNameTransformer(); - final var outputSchema = nameTransformer.convertStreamName(config.get(schemaFieldName).asText()); - performCreateInsertTestOnDestination(outputSchema, database, nameTransformer); - - return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); - } catch (final ConnectionErrorException ex) { - LOGGER.info("Exception while checking connection: ", ex); - final String message = getErrorMessage(ex.getStateCode(), ex.getErrorCode(), ex.getExceptionMessage(), ex); - AirbyteTraceMessageUtility.emitConfigErrorTrace(ex, message); - return new AirbyteConnectionStatus() - .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage(message); - } catch (final Exception e) { - LOGGER.error("Exception attempting to connect to the warehouse: ", e); - return new AirbyteConnectionStatus() - .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage("Could not connect to the warehouse with the provided configuration. \n" + e.getMessage()); - } finally { - try { - DataSourceFactory.close(dataSource); - } catch (final Exception e) { - LOGGER.warn("Unable to close data source.", e); - } - } - } - - protected void performCreateInsertTestOnDestination(final String outputSchema, - final JdbcDatabase database, - final NamingConventionTransformer nameTransformer) - throws Exception { - AbstractJdbcDestination.attemptTableOperations(outputSchema, database, nameTransformer, getSqlOperations(), true); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.java deleted file mode 100644 index 6ba5d77e2962..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.copy; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.base.Preconditions; -import io.airbyte.cdk.integrations.BaseConnector; -import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; -import io.airbyte.cdk.integrations.base.Destination; -import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.function.Consumer; -import java.util.function.Function; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Multiple configs may allow you to sync data to the destination in multiple ways. - * - * One primary example is that the default behavior for some DB-based destinations may use - * INSERT-based destinations while (given additional credentials) it may be able to sync data using - * a file copied to a staging location. - * - * This class exists to make it easy to define a destination in terms of multiple other destination - * implementations, switching between them based on the config provided. - */ -public class SwitchingDestination> extends BaseConnector implements Destination { - - private static final Logger LOGGER = LoggerFactory.getLogger(SwitchingDestination.class); - - private final Function configToType; - private final Map typeToDestination; - - public SwitchingDestination(final Class enumClass, final Function configToType, final Map typeToDestination) { - final Set allEnumConstants = new HashSet<>(Arrays.asList(enumClass.getEnumConstants())); - final Set supportedEnumConstants = typeToDestination.keySet(); - - // check that it isn't possible for configToType to produce something we can't handle - Preconditions.checkArgument(allEnumConstants.equals(supportedEnumConstants)); - - this.configToType = configToType; - this.typeToDestination = typeToDestination; - } - - @Override - public AirbyteConnectionStatus check(final JsonNode config) throws Exception { - final T destinationType = configToType.apply(config); - LOGGER.info("Using destination type: " + destinationType.name()); - return typeToDestination.get(destinationType).check(config); - } - - @Override - public AirbyteMessageConsumer getConsumer(final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector) - throws Exception { - final T destinationType = configToType.apply(config); - LOGGER.info("Using destination type: " + destinationType.name()); - return typeToDestination.get(destinationType).getConsumer(config, catalog, outputRecordCollector); - } - - @Override - public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector) - throws Exception { - final T destinationType = configToType.apply(config); - LOGGER.info("Using destination type: " + destinationType.name()); - return typeToDestination.get(destinationType).getSerializedMessageConsumer(config, catalog, outputRecordCollector); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java deleted file mode 100644 index aa3c5450338f..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping; - -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_RAW_ID; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.V2_FINAL_TABLE_METADATA_COLUMNS; -import static java.util.stream.Collectors.toMap; -import static org.jooq.impl.DSL.exists; -import static org.jooq.impl.DSL.field; -import static org.jooq.impl.DSL.name; -import static org.jooq.impl.DSL.quotedName; -import static org.jooq.impl.DSL.selectOne; -import static org.jooq.impl.DSL.table; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.destination.jdbc.ColumnDefinition; -import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; -import io.airbyte.cdk.integrations.util.ConnectorExceptionUtil; -import io.airbyte.commons.concurrency.CompletableFutures; -import io.airbyte.commons.exceptions.SQLRuntimeException; -import io.airbyte.commons.functional.Either; -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; -import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationHandler; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialStatus; -import io.airbyte.integrations.base.destination.typing_deduping.InitialRawTableStatus; -import io.airbyte.integrations.base.destination.typing_deduping.Sql; -import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import io.airbyte.integrations.base.destination.typing_deduping.Struct; -import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.time.Instant; -import java.time.OffsetDateTime; -import java.time.temporal.ChronoUnit; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.UUID; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.stream.Stream; -import lombok.extern.slf4j.Slf4j; -import org.jetbrains.annotations.NotNull; -import org.jooq.Condition; -import org.jooq.DSLContext; -import org.jooq.InsertValuesStep4; -import org.jooq.Record; -import org.jooq.SQLDialect; -import org.jooq.conf.ParamType; -import org.jooq.impl.DSL; -import org.jooq.impl.SQLDataType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@Slf4j -public abstract class JdbcDestinationHandler implements DestinationHandler { - - private static final Logger LOGGER = LoggerFactory.getLogger(JdbcDestinationHandler.class); - private static final String DESTINATION_STATE_TABLE_NAME = "_airbyte_destination_state"; - private static final String DESTINATION_STATE_TABLE_COLUMN_NAME = "name"; - private static final String DESTINATION_STATE_TABLE_COLUMN_NAMESPACE = "namespace"; - private static final String DESTINATION_STATE_TABLE_COLUMN_STATE = "destination_state"; - private static final String DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT = "updated_at"; - - protected final String databaseName; - protected final JdbcDatabase jdbcDatabase; - protected final String rawTableSchemaName; - private final SQLDialect dialect; - - public JdbcDestinationHandler(final String databaseName, - final JdbcDatabase jdbcDatabase, - final String rawTableSchemaName, - final SQLDialect dialect) { - this.databaseName = databaseName; - this.jdbcDatabase = jdbcDatabase; - this.rawTableSchemaName = rawTableSchemaName; - this.dialect = dialect; - } - - protected DSLContext getDslContext() { - return DSL.using(dialect); - } - - private Optional findExistingTable(final StreamId id) throws Exception { - return findExistingTable(jdbcDatabase, databaseName, id.getFinalNamespace(), id.getFinalName()); - } - - private boolean isFinalTableEmpty(final StreamId id) throws Exception { - return !jdbcDatabase.queryBoolean( - getDslContext().select( - field(exists( - selectOne() - .from(name(id.getFinalNamespace(), id.getFinalName())) - .limit(1)))) - .getSQL(ParamType.INLINED)); - } - - private InitialRawTableStatus getInitialRawTableState(final StreamId id) throws Exception { - boolean tableExists = jdbcDatabase.executeMetadataQuery(dbmetadata -> { - LOGGER.info("Retrieving table from Db metadata: {} {} {}", databaseName, id.getRawNamespace(), id.getRawName()); - try (final ResultSet table = dbmetadata.getTables(databaseName, id.getRawNamespace(), id.getRawName(), null)) { - return table.next(); - } catch (SQLException e) { - LOGGER.error("Failed to retrieve table info from metadata", e); - throw new SQLRuntimeException(e); - } - }); - if (!tableExists) { - // There's no raw table at all. Therefore there are no unprocessed raw records, and this sync - // should not filter raw records by timestamp. - return new InitialRawTableStatus(false, false, Optional.empty()); - } - // And use two explicit queries because COALESCE might not short-circuit evaluation. - // This first query tries to find the oldest raw record with loaded_at = NULL. - // Unsafe query requires us to explicitly close the Stream, which is inconvenient, - // but it's also the only method in the JdbcDatabase interface to return non-string/int types - try (final Stream timestampStream = jdbcDatabase.unsafeQuery( - conn -> conn.prepareStatement( - getDslContext().select(field("MIN(_airbyte_extracted_at)").as("min_timestamp")) - .from(name(id.getRawNamespace(), id.getRawName())) - .where(DSL.condition("_airbyte_loaded_at IS NULL")) - .getSQL()), - record -> record.getTimestamp("min_timestamp"))) { - // Filter for nonNull values in case the query returned NULL (i.e. no unloaded records). - final Optional minUnloadedTimestamp = timestampStream.filter(Objects::nonNull).findFirst(); - if (minUnloadedTimestamp.isPresent()) { - // Decrement by 1 second since timestamp precision varies between databases. - final Optional ts = minUnloadedTimestamp - .map(Timestamp::toInstant) - .map(i -> i.minus(1, ChronoUnit.SECONDS)); - return new InitialRawTableStatus(true, true, ts); - } - } - // If there are no unloaded raw records, then we can safely skip all existing raw records. - // This second query just finds the newest raw record. - try (final Stream timestampStream = jdbcDatabase.unsafeQuery( - conn -> conn.prepareStatement( - getDslContext().select(field("MAX(_airbyte_extracted_at)").as("min_timestamp")) - .from(name(id.getRawNamespace(), id.getRawName())) - .getSQL()), - record -> record.getTimestamp("min_timestamp"))) { - // Filter for nonNull values in case the query returned NULL (i.e. no raw records at all). - final Optional minUnloadedTimestamp = timestampStream.filter(Objects::nonNull).findFirst(); - return new InitialRawTableStatus(true, false, minUnloadedTimestamp.map(Timestamp::toInstant)); - } - } - - @Override - public void execute(final Sql sql) throws Exception { - final List> transactions = sql.transactions; - final UUID queryId = UUID.randomUUID(); - for (final List transaction : transactions) { - final UUID transactionId = UUID.randomUUID(); - LOGGER.info("Executing sql {}-{}: {}", queryId, transactionId, String.join("\n", transaction)); - final long startTime = System.currentTimeMillis(); - - try { - jdbcDatabase.executeWithinTransaction(transaction); - } catch (final SQLException e) { - LOGGER.error("Sql {}-{} failed", queryId, transactionId, e); - throw e; - } - - LOGGER.info("Sql {}-{} completed in {} ms", queryId, transactionId, System.currentTimeMillis() - startTime); - } - } - - @Override - public List> gatherInitialState(List streamConfigs) throws Exception { - // Use stream n/ns pair because we don't want to build the full StreamId here - CompletableFuture> destinationStatesFuture = CompletableFuture.supplyAsync(() -> { - try { - return getAllDestinationStates(); - } catch (SQLException e) { - throw new RuntimeException(e); - } - }); - - final List>> initialStates = streamConfigs.stream() - .map(streamConfig -> retrieveState(destinationStatesFuture, streamConfig)) - .toList(); - final List>> states = - CompletableFutures.allOf(initialStates).toCompletableFuture().join(); - return ConnectorExceptionUtil.getResultsOrLogAndThrowFirst("Failed to retrieve initial state", states); - } - - @NotNull - protected Map getAllDestinationStates() throws SQLException { - // Guarantee the table exists. - jdbcDatabase.execute( - getDslContext().createTableIfNotExists(quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) - .column(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), SQLDataType.VARCHAR) - .column(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), SQLDataType.VARCHAR) - // Just use a string type, even if the destination has a json type. - // We're never going to query this column in a fancy way - all our processing can happen - // client-side. - .column(quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), SQLDataType.VARCHAR) - // Add an updated_at field. We don't actually need it yet, but it can't hurt! - .column(quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), SQLDataType.TIMESTAMPWITHTIMEZONE) - .getSQL(ParamType.INLINED)); - // Fetch all records from it. We _could_ filter down to just our streams... but meh. This is small - // data. - return jdbcDatabase.queryJsons( - getDslContext().select( - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)), - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE)), - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE))).from(quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) - .getSQL()) - .stream() - .peek(recordJson -> { - // Forcibly downcase all key names. - // This is to handle any destinations that upcase the column names. - // For example - Snowflake with QUOTED_IDENTIFIERS_IGNORE_CASE=TRUE. - final ObjectNode record = (ObjectNode) recordJson; - final Map newFields = new HashMap<>(); - for (Iterator it = record.fieldNames(); it.hasNext();) { - String fieldName = it.next(); - // We can't directly call record.set here, because that will raise a - // ConcurrentModificationException on the fieldnames iterator. - // Instead, build up a map of new fields and set them all at once. - newFields.put(fieldName.toLowerCase(), record.get(fieldName)); - } - - record.setAll(newFields); - }).collect(toMap( - record -> { - final JsonNode nameNode = record.get(DESTINATION_STATE_TABLE_COLUMN_NAME); - final JsonNode namespaceNode = record.get(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE); - return new AirbyteStreamNameNamespacePair( - nameNode != null ? nameNode.asText() : null, - namespaceNode != null ? namespaceNode.asText() : null); - }, - record -> { - final JsonNode stateNode = record.get(DESTINATION_STATE_TABLE_COLUMN_STATE); - JsonNode state = stateNode != null ? Jsons.deserialize(stateNode.asText()) : Jsons.emptyObject(); - return toDestinationState(state); - })); - } - - private CompletionStage> retrieveState(final CompletableFuture> destinationStatesFuture, - final StreamConfig streamConfig) { - return destinationStatesFuture.thenApply(destinationStates -> { - try { - final Optional finalTableDefinition = findExistingTable(streamConfig.getId()); - final boolean isSchemaMismatch; - final boolean isFinalTableEmpty; - if (finalTableDefinition.isPresent()) { - isSchemaMismatch = !existingSchemaMatchesStreamConfig(streamConfig, finalTableDefinition.get()); - isFinalTableEmpty = isFinalTableEmpty(streamConfig.getId()); - } else { - // If the final table doesn't exist, then by definition it doesn't have a schema mismatch and has no - // records. - isSchemaMismatch = false; - isFinalTableEmpty = true; - } - final InitialRawTableStatus initialRawTableState = getInitialRawTableState(streamConfig.getId()); - DestinationState destinationState = destinationStates.getOrDefault(streamConfig.getId().asPair(), toDestinationState(Jsons.emptyObject())); - return new DestinationInitialStatus<>(streamConfig, finalTableDefinition.isPresent(), initialRawTableState, - isSchemaMismatch, isFinalTableEmpty, destinationState); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - public static Optional findExistingTable(final JdbcDatabase jdbcDatabase, - final String databaseName, - final String schemaName, - final String tableName) - throws SQLException { - final LinkedHashMap retrievedColumnDefns = jdbcDatabase.executeMetadataQuery(dbMetadata -> { - - // TODO: normalize namespace and finalName strings to quoted-lowercase (as needed. Snowflake - // requires uppercase) - final LinkedHashMap columnDefinitions = new LinkedHashMap<>(); - LOGGER.info("Retrieving existing columns for {}.{}.{}", databaseName, schemaName, tableName); - try (final ResultSet columns = dbMetadata.getColumns(databaseName, schemaName, tableName, null)) { - while (columns.next()) { - final String columnName = columns.getString("COLUMN_NAME"); - final String typeName = columns.getString("TYPE_NAME"); - final int columnSize = columns.getInt("COLUMN_SIZE"); - final String isNullable = columns.getString("IS_NULLABLE"); - columnDefinitions.put(columnName, new ColumnDefinition(columnName, typeName, columnSize, fromIsNullableIsoString(isNullable))); - } - } catch (final SQLException e) { - LOGGER.error("Failed to retrieve column info for {}.{}.{}", databaseName, schemaName, tableName, e); - throw new SQLRuntimeException(e); - } - return columnDefinitions; - }); - // Guard to fail fast - if (retrievedColumnDefns.isEmpty()) { - return Optional.empty(); - } - - return Optional.of(new TableDefinition(retrievedColumnDefns)); - } - - public static boolean fromIsNullableIsoString(final String isNullable) { - return "YES".equalsIgnoreCase(isNullable); - } - - private boolean isAirbyteRawIdColumnMatch(final TableDefinition existingTable) { - return existingTable.columns().containsKey(COLUMN_NAME_AB_RAW_ID) && - toJdbcTypeName(AirbyteProtocolType.STRING).equals(existingTable.columns().get(COLUMN_NAME_AB_RAW_ID).type()); - } - - private boolean isAirbyteExtractedAtColumnMatch(final TableDefinition existingTable) { - return existingTable.columns().containsKey(COLUMN_NAME_AB_EXTRACTED_AT) && - toJdbcTypeName(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE).equals(existingTable.columns().get(COLUMN_NAME_AB_EXTRACTED_AT).type()); - } - - private boolean isAirbyteMetaColumnMatch(final TableDefinition existingTable) { - return existingTable.columns().containsKey(COLUMN_NAME_AB_META) && - toJdbcTypeName(new Struct(new LinkedHashMap<>())).equals(existingTable.columns().get(COLUMN_NAME_AB_META).type()); - } - - protected boolean existingSchemaMatchesStreamConfig(final StreamConfig stream, final TableDefinition existingTable) { - // Check that the columns match, with special handling for the metadata columns. - if (!isAirbyteRawIdColumnMatch(existingTable) || - !isAirbyteExtractedAtColumnMatch(existingTable) || - !isAirbyteMetaColumnMatch(existingTable)) { - // Missing AB meta columns from final table, we need them to do proper T+D so trigger soft-reset - return false; - } - final LinkedHashMap intendedColumns = stream.getColumns().entrySet().stream() - .collect(LinkedHashMap::new, - (map, column) -> map.put(column.getKey().getName(), toJdbcTypeName(column.getValue())), - LinkedHashMap::putAll); - - // Filter out Meta columns since they don't exist in stream config. - final LinkedHashMap actualColumns = existingTable.columns().entrySet().stream() - .filter(column -> V2_FINAL_TABLE_METADATA_COLUMNS.stream() - .noneMatch(airbyteColumnName -> airbyteColumnName.equals(column.getKey()))) - .collect(LinkedHashMap::new, - (map, column) -> map.put(column.getKey(), column.getValue().type()), - LinkedHashMap::putAll); - - return actualColumns.equals(intendedColumns); - } - - @Override - public void commitDestinationStates(final Map destinationStates) throws Exception { - if (destinationStates.isEmpty()) { - return; - } - - // Delete all state records where the stream name+namespace match one of our states - String deleteStates = getDslContext().deleteFrom(table(quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME))) - .where(destinationStates.keySet().stream() - .map(streamId -> field(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)).eq(streamId.getOriginalName()) - .and(field(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE)).eq(streamId.getOriginalNamespace()))) - .reduce( - DSL.falseCondition(), - Condition::or)) - .getSQL(ParamType.INLINED); - - // Reinsert all of our states - @NotNull - InsertValuesStep4 insertStatesStep = - getDslContext().insertInto(table(quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME))) - .columns( - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), String.class), - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), String.class), - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), String.class), - // This field is a timestamptz, but it's easier to just insert a string - // and assume the destination can cast it appropriately. - // Destination-specific timestamp syntax is weird and annoying. - field(quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), String.class)); - for (Map.Entry destinationState : destinationStates.entrySet()) { - final StreamId streamId = destinationState.getKey(); - final String stateJson = Jsons.serialize(destinationState.getValue()); - insertStatesStep = - insertStatesStep.values(streamId.getOriginalName(), streamId.getOriginalNamespace(), stateJson, OffsetDateTime.now().toString()); - } - String insertStates = insertStatesStep.getSQL(ParamType.INLINED); - - jdbcDatabase.executeWithinTransaction(List.of(deleteStates, insertStates)); - } - - /** - * Convert to the TYPE_NAME retrieved from {@link java.sql.DatabaseMetaData#getColumns} - * - * @param airbyteType - * @return - */ - protected abstract String toJdbcTypeName(final AirbyteType airbyteType); - - protected abstract DestinationState toDestinationState(final JsonNode json); - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java deleted file mode 100644 index a3da65e527dc..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping; - -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_ID; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_RAW_ID; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_EMITTED_AT; -import static io.airbyte.integrations.base.destination.typing_deduping.Sql.transactionally; -import static java.util.stream.Collectors.toList; -import static org.jooq.impl.DSL.alterTable; -import static org.jooq.impl.DSL.asterisk; -import static org.jooq.impl.DSL.cast; -import static org.jooq.impl.DSL.dropTableIfExists; -import static org.jooq.impl.DSL.field; -import static org.jooq.impl.DSL.inline; -import static org.jooq.impl.DSL.name; -import static org.jooq.impl.DSL.noCondition; -import static org.jooq.impl.DSL.quotedName; -import static org.jooq.impl.DSL.select; -import static org.jooq.impl.DSL.table; -import static org.jooq.impl.DSL.update; -import static org.jooq.impl.DSL.with; - -import com.google.common.annotations.VisibleForTesting; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; -import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; -import io.airbyte.integrations.base.destination.typing_deduping.Array; -import io.airbyte.integrations.base.destination.typing_deduping.ColumnId; -import io.airbyte.integrations.base.destination.typing_deduping.Sql; -import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator; -import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import io.airbyte.integrations.base.destination.typing_deduping.Struct; -import io.airbyte.integrations.base.destination.typing_deduping.Union; -import io.airbyte.integrations.base.destination.typing_deduping.UnsupportedOneOf; -import io.airbyte.protocol.models.v0.DestinationSyncMode; -import java.sql.Timestamp; -import java.time.Instant; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Stream; -import org.jooq.CommonTableExpression; -import org.jooq.Condition; -import org.jooq.CreateSchemaFinalStep; -import org.jooq.CreateTableColumnStep; -import org.jooq.DSLContext; -import org.jooq.DataType; -import org.jooq.Field; -import org.jooq.InsertValuesStepN; -import org.jooq.Name; -import org.jooq.Record; -import org.jooq.SQLDialect; -import org.jooq.SelectConditionStep; -import org.jooq.conf.ParamType; -import org.jooq.impl.DSL; -import org.jooq.impl.SQLDataType; - -public abstract class JdbcSqlGenerator implements SqlGenerator { - - protected static final String ROW_NUMBER_COLUMN_NAME = "row_number"; - private static final String TYPING_CTE_ALIAS = "intermediate_data"; - private static final String NUMBERED_ROWS_CTE_ALIAS = "numbered_rows"; - - protected final NamingConventionTransformer namingTransformer; - protected final ColumnId cdcDeletedAtColumn; - - public JdbcSqlGenerator(final NamingConventionTransformer namingTransformer) { - this.namingTransformer = namingTransformer; - this.cdcDeletedAtColumn = buildColumnId("_ab_cdc_deleted_at"); - } - - @Override - public StreamId buildStreamId(final String namespace, final String name, final String rawNamespaceOverride) { - return new StreamId( - namingTransformer.getNamespace(namespace), - namingTransformer.convertStreamName(name), - namingTransformer.getNamespace(rawNamespaceOverride), - namingTransformer.convertStreamName(StreamId.concatenateRawTableName(namespace, name)), - namespace, - name); - } - - @Override - public ColumnId buildColumnId(final String name, final String suffix) { - final String nameWithSuffix = name + suffix; - return new ColumnId( - namingTransformer.getIdentifier(nameWithSuffix), - name, - namingTransformer.getIdentifier(nameWithSuffix)); - } - - protected DataType toDialectType(final AirbyteType type) { - if (type instanceof final AirbyteProtocolType airbyteProtocolType) { - return toDialectType(airbyteProtocolType); - } - return switch (type.getTypeName()) { - case Struct.TYPE, UnsupportedOneOf.TYPE -> getStructType(); - case Array.TYPE -> getArrayType(); - // No nested Unions supported so this will definitely not result in infinite recursion. - case Union.TYPE -> toDialectType(((Union) type).chooseType()); - default -> throw new IllegalArgumentException("Unsupported AirbyteType: " + type); - }; - } - - @VisibleForTesting - public DataType toDialectType(final AirbyteProtocolType airbyteProtocolType) { - return switch (airbyteProtocolType) { - // Many destinations default to a very short length (e.g. Redshift defaults to 256). - // Explicitly set 64KiB here. Subclasses may want to override this value. - case STRING -> SQLDataType.VARCHAR(65535); - // We default to precision=38, scale=9 across destinations. - // This is the default numeric parameters for both redshift and bigquery. - case NUMBER -> SQLDataType.DECIMAL(38, 9); - case INTEGER -> SQLDataType.BIGINT; - case BOOLEAN -> SQLDataType.BOOLEAN; - case TIMESTAMP_WITH_TIMEZONE -> SQLDataType.TIMESTAMPWITHTIMEZONE; - case TIMESTAMP_WITHOUT_TIMEZONE -> SQLDataType.TIMESTAMP; - case TIME_WITH_TIMEZONE -> SQLDataType.TIMEWITHTIMEZONE; - case TIME_WITHOUT_TIMEZONE -> SQLDataType.TIME; - case DATE -> SQLDataType.DATE; - case UNKNOWN -> getWidestType(); - }; - } - - protected abstract DataType getStructType(); - - protected abstract DataType getArrayType(); - - @VisibleForTesting - public DataType getTimestampWithTimeZoneType() { - return toDialectType(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE); - } - - protected abstract DataType getWidestType(); - - protected abstract SQLDialect getDialect(); - - /** - * @param columns from the schema to be extracted from _airbyte_data column. Use the destination - * specific syntax to extract data - * @param useExpensiveSaferCasting - * @return a list of jooq fields for the final table insert statement. - */ - protected abstract List> extractRawDataFields(final LinkedHashMap columns, boolean useExpensiveSaferCasting); - - /** - * - * @param columns from the schema to be used for type casting errors and construct _airbyte_meta - * column - * @return - */ - protected abstract Field buildAirbyteMetaColumn(final LinkedHashMap columns); - - /** - * Get the cdc_deleted_at column condition for append_dedup mode by extracting it from _airbyte_data - * column in raw table. - * - * @return - */ - protected abstract Condition cdcDeletedAtNotNullCondition(); - - /** - * Get the window step function row_number() over (partition by primary_key order by cursor_field) - * as row_number. - * - * @param primaryKey list of primary keys - * @param cursorField cursor field used for ordering - * @return - */ - protected abstract Field getRowNumber(final List primaryKey, final Optional cursorField); - - protected DSLContext getDslContext() { - return DSL.using(getDialect()); - } - - /** - * build jooq fields for final table with customers columns first and then meta columns. - * - * @param columns - * @param metaColumns - * @return - */ - @VisibleForTesting - List> buildFinalTableFields(final LinkedHashMap columns, final Map> metaColumns) { - final List> fields = - metaColumns.entrySet().stream().map(metaColumn -> field(quotedName(metaColumn.getKey()), metaColumn.getValue())).collect(toList()); - final List> dataFields = - columns.entrySet().stream().map(column -> field(quotedName(column.getKey().getName()), toDialectType(column.getValue()))).collect( - toList()); - dataFields.addAll(fields); - return dataFields; - } - - /** - * Use this method to get the final table meta columns with or without _airbyte_meta column. - * - * @param includeMetaColumn - * @return - */ - LinkedHashMap> getFinalTableMetaColumns(final boolean includeMetaColumn) { - final LinkedHashMap> metaColumns = new LinkedHashMap<>(); - metaColumns.put(COLUMN_NAME_AB_RAW_ID, SQLDataType.VARCHAR(36).nullable(false)); - metaColumns.put(COLUMN_NAME_AB_EXTRACTED_AT, getTimestampWithTimeZoneType().nullable(false)); - if (includeMetaColumn) - metaColumns.put(COLUMN_NAME_AB_META, getStructType().nullable(false)); - return metaColumns; - } - - /** - * build jooq fields for raw table with type-casted data columns first and then meta columns without - * _airbyte_meta. - * - * @param columns - * @param metaColumns - * @return - */ - @VisibleForTesting - List> buildRawTableSelectFields(final LinkedHashMap columns, - final Map> metaColumns, - final boolean useExpensiveSaferCasting) { - final List> fields = - metaColumns.entrySet().stream().map(metaColumn -> field(quotedName(metaColumn.getKey()), metaColumn.getValue())).collect(toList()); - // Use originalName with non-sanitized characters when extracting data from _airbyte_data - final List> dataFields = extractRawDataFields(columns, useExpensiveSaferCasting); - dataFields.addAll(fields); - return dataFields; - } - - @VisibleForTesting - Condition rawTableCondition(final DestinationSyncMode syncMode, final boolean isCdcDeletedAtPresent, final Optional minRawTimestamp) { - Condition condition = field(name(COLUMN_NAME_AB_LOADED_AT)).isNull(); - if (syncMode == DestinationSyncMode.APPEND_DEDUP) { - if (isCdcDeletedAtPresent) { - condition = condition.or(cdcDeletedAtNotNullCondition()); - } - } - if (minRawTimestamp.isPresent()) { - condition = condition.and(field(name(COLUMN_NAME_AB_EXTRACTED_AT)).gt(minRawTimestamp.get().toString())); - } - return condition; - } - - @Override - public Sql createSchema(final String schema) { - return Sql.of(createSchemaSql(schema)); - } - - @Override - public Sql createTable(final StreamConfig stream, final String suffix, final boolean force) { - // TODO: Use Naming transformer to sanitize these strings with redshift restrictions. - final String finalTableIdentifier = stream.getId().getFinalName() + suffix.toLowerCase(); - if (!force) { - return transactionally(Stream.concat( - Stream.of(createTableSql(stream.getId().getFinalNamespace(), finalTableIdentifier, stream.getColumns())), - createIndexSql(stream, suffix).stream()).toList()); - } - return transactionally(Stream.concat( - Stream.of( - dropTableIfExists(quotedName(stream.getId().getFinalNamespace(), finalTableIdentifier)).getSQL(ParamType.INLINED), - createTableSql(stream.getId().getFinalNamespace(), finalTableIdentifier, stream.getColumns())), - createIndexSql(stream, suffix).stream()).toList()); - } - - @Override - public Sql updateTable(final StreamConfig streamConfig, - final String finalSuffix, - final Optional minRawTimestamp, - final boolean useExpensiveSaferCasting) { - - // TODO: Add flag to use merge vs insert/delete - return insertAndDeleteTransaction(streamConfig, finalSuffix, minRawTimestamp, useExpensiveSaferCasting); - - } - - @Override - public Sql overwriteFinalTable(final StreamId stream, final String finalSuffix) { - return transactionally( - dropTableIfExists(name(stream.getFinalNamespace(), stream.getFinalName())).getSQL(ParamType.INLINED), - alterTable(name(stream.getFinalNamespace(), stream.getFinalName() + finalSuffix)) - .renameTo(name(stream.getFinalName())) - .getSQL()); - } - - @Override - public Sql migrateFromV1toV2(final StreamId streamId, final String namespace, final String tableName) { - final Name rawTableName = name(streamId.getRawNamespace(), streamId.getRawName()); - final DSLContext dsl = getDslContext(); - return transactionally( - dsl.createSchemaIfNotExists(streamId.getRawNamespace()).getSQL(), - dsl.dropTableIfExists(rawTableName).getSQL(), - DSL.createTable(rawTableName) - .column(COLUMN_NAME_AB_RAW_ID, SQLDataType.VARCHAR(36).nullable(false)) - .column(COLUMN_NAME_AB_EXTRACTED_AT, getTimestampWithTimeZoneType().nullable(false)) - .column(COLUMN_NAME_AB_LOADED_AT, getTimestampWithTimeZoneType().nullable(true)) - .column(COLUMN_NAME_DATA, getStructType().nullable(false)) - .column(COLUMN_NAME_AB_META, getStructType().nullable(true)) - .as(select( - field(COLUMN_NAME_AB_ID).as(COLUMN_NAME_AB_RAW_ID), - field(COLUMN_NAME_EMITTED_AT).as(COLUMN_NAME_AB_EXTRACTED_AT), - cast(null, getTimestampWithTimeZoneType()).as(COLUMN_NAME_AB_LOADED_AT), - field(COLUMN_NAME_DATA).as(COLUMN_NAME_DATA), - cast(null, getStructType()).as(COLUMN_NAME_AB_META)).from(table(name(namespace, tableName)))) - .getSQL(ParamType.INLINED)); - } - - @Override - public Sql clearLoadedAt(final StreamId streamId) { - return Sql.of(update(table(name(streamId.getRawNamespace(), streamId.getRawName()))) - .set(field(COLUMN_NAME_AB_LOADED_AT), inline((String) null)) - .getSQL()); - } - - @VisibleForTesting - SelectConditionStep selectFromRawTable(final String schemaName, - final String tableName, - final LinkedHashMap columns, - final Map> metaColumns, - final Condition condition, - final boolean useExpensiveSaferCasting) { - final DSLContext dsl = getDslContext(); - return dsl - .select(buildRawTableSelectFields(columns, metaColumns, useExpensiveSaferCasting)) - .select(buildAirbyteMetaColumn(columns)) - .from(table(quotedName(schemaName, tableName))) - .where(condition); - } - - @VisibleForTesting - InsertValuesStepN insertIntoFinalTable(final String schemaName, - final String tableName, - final LinkedHashMap columns, - final Map> metaFields) { - final DSLContext dsl = getDslContext(); - return dsl - .insertInto(table(quotedName(schemaName, tableName))) - .columns(buildFinalTableFields(columns, metaFields)); - } - - private Sql insertAndDeleteTransaction(final StreamConfig streamConfig, - final String finalSuffix, - final Optional minRawTimestamp, - final boolean useExpensiveSaferCasting) { - final String finalSchema = streamConfig.getId().getFinalNamespace(); - final String finalTable = streamConfig.getId().getFinalName() + (finalSuffix != null ? finalSuffix.toLowerCase() : ""); - final String rawSchema = streamConfig.getId().getRawNamespace(); - final String rawTable = streamConfig.getId().getRawName(); - - // Poor person's guarantee of ordering of fields by using same source of ordered list of columns to - // generate fields. - final CommonTableExpression rawTableRowsWithCast = name(TYPING_CTE_ALIAS).as( - selectFromRawTable(rawSchema, rawTable, streamConfig.getColumns(), - getFinalTableMetaColumns(false), - rawTableCondition(streamConfig.getDestinationSyncMode(), - streamConfig.getColumns().containsKey(cdcDeletedAtColumn), - minRawTimestamp), - useExpensiveSaferCasting)); - final List> finalTableFields = buildFinalTableFields(streamConfig.getColumns(), getFinalTableMetaColumns(true)); - final Field rowNumber = getRowNumber(streamConfig.getPrimaryKey(), streamConfig.getCursor()); - final CommonTableExpression filteredRows = name(NUMBERED_ROWS_CTE_ALIAS).as( - select(asterisk(), rowNumber).from(rawTableRowsWithCast)); - - // Used for append-dedupe mode. - final String insertStmtWithDedupe = - insertIntoFinalTable(finalSchema, finalTable, streamConfig.getColumns(), getFinalTableMetaColumns(true)) - .select(with(rawTableRowsWithCast) - .with(filteredRows) - .select(finalTableFields) - .from(filteredRows) - .where(field(name(ROW_NUMBER_COLUMN_NAME), Integer.class).eq(1)) // Can refer by CTE.field but no use since we don't strongly type - // them. - ) - .getSQL(ParamType.INLINED); - - // Used for append and overwrite modes. - final String insertStmt = - insertIntoFinalTable(finalSchema, finalTable, streamConfig.getColumns(), getFinalTableMetaColumns(true)) - .select(with(rawTableRowsWithCast) - .select(finalTableFields) - .from(rawTableRowsWithCast)) - .getSQL(ParamType.INLINED); - final String deleteStmt = deleteFromFinalTable(finalSchema, finalTable, streamConfig.getPrimaryKey(), streamConfig.getCursor()); - final String deleteCdcDeletesStmt = - streamConfig.getColumns().containsKey(cdcDeletedAtColumn) ? deleteFromFinalTableCdcDeletes(finalSchema, finalTable) : ""; - final String checkpointStmt = checkpointRawTable(rawSchema, rawTable, minRawTimestamp); - - if (streamConfig.getDestinationSyncMode() != DestinationSyncMode.APPEND_DEDUP) { - return transactionally( - insertStmt, - checkpointStmt); - } - - // For append-dedupe - return transactionally( - insertStmtWithDedupe, - deleteStmt, - deleteCdcDeletesStmt, - checkpointStmt); - } - - private String mergeTransaction(final StreamConfig streamConfig, - final String finalSuffix, - final Optional minRawTimestamp, - final boolean useExpensiveSaferCasting) { - - throw new UnsupportedOperationException("Not implemented yet"); - - } - - protected String createSchemaSql(final String namespace) { - final DSLContext dsl = getDslContext(); - final CreateSchemaFinalStep createSchemaSql = dsl.createSchemaIfNotExists(quotedName(namespace)); - return createSchemaSql.getSQL(); - } - - protected String createTableSql(final String namespace, final String tableName, final LinkedHashMap columns) { - final DSLContext dsl = getDslContext(); - final CreateTableColumnStep createTableSql = dsl - .createTable(quotedName(namespace, tableName)) - .columns(buildFinalTableFields(columns, getFinalTableMetaColumns(true))); - return createTableSql.getSQL(); - } - - /** - * Subclasses may override this method to add additional indexes after their CREATE TABLE statement. - * This is useful if the destination's CREATE TABLE statement does not accept an index definition. - */ - protected List createIndexSql(final StreamConfig stream, final String suffix) { - return Collections.emptyList(); - } - - protected String beginTransaction() { - return "BEGIN"; - } - - protected String commitTransaction() { - return "COMMIT"; - } - - private String commitTransactionInternal() { - return commitTransaction() + ";"; - } - - private String deleteFromFinalTable(final String schemaName, - final String tableName, - final List primaryKeys, - final Optional cursor) { - final DSLContext dsl = getDslContext(); - // Unknown type doesn't play well with where .. in (select..) - final Field airbyteRawId = field(quotedName(COLUMN_NAME_AB_RAW_ID)); - final Field rowNumber = getRowNumber(primaryKeys, cursor); - return dsl.deleteFrom(table(quotedName(schemaName, tableName))) - .where(airbyteRawId.in( - select(airbyteRawId) - .from(select(airbyteRawId, rowNumber) - .from(table(quotedName(schemaName, tableName))).asTable("airbyte_ids")) - .where(field(name(ROW_NUMBER_COLUMN_NAME)).ne(1)))) - .getSQL(ParamType.INLINED); - } - - private String deleteFromFinalTableCdcDeletes(final String schema, final String tableName) { - final DSLContext dsl = getDslContext(); - return dsl.deleteFrom(table(quotedName(schema, tableName))) - .where(field(quotedName(cdcDeletedAtColumn.getName())).isNotNull()) - .getSQL(ParamType.INLINED); - } - - private String checkpointRawTable(final String schemaName, final String tableName, final Optional minRawTimestamp) { - final DSLContext dsl = getDslContext(); - Condition extractedAtCondition = noCondition(); - if (minRawTimestamp.isPresent()) { - extractedAtCondition = extractedAtCondition.and(field(name(COLUMN_NAME_AB_EXTRACTED_AT)).gt(minRawTimestamp.get().toString())); - } - return dsl.update(table(quotedName(schemaName, tableName))) - .set(field(quotedName(COLUMN_NAME_AB_LOADED_AT)), currentTimestamp()) - .where(field(quotedName(COLUMN_NAME_AB_LOADED_AT)).isNull()).and(extractedAtCondition) - .getSQL(ParamType.INLINED); - } - - protected Field castedField( - final Field field, - final AirbyteType type, - final String alias, - final boolean useExpensiveSaferCasting) { - if (type instanceof final AirbyteProtocolType airbyteProtocolType) { - return castedField(field, airbyteProtocolType, useExpensiveSaferCasting).as(quotedName(alias)); - } - - // Redshift SUPER can silently cast an array type to struct and vice versa. - return switch (type.getTypeName()) { - case Struct.TYPE, UnsupportedOneOf.TYPE -> cast(field, getStructType()).as(quotedName(alias)); - case Array.TYPE -> cast(field, getArrayType()).as(quotedName(alias)); - // No nested Unions supported so this will definitely not result in infinite recursion. - case Union.TYPE -> castedField(field, ((Union) type).chooseType(), alias, useExpensiveSaferCasting); - default -> throw new IllegalArgumentException("Unsupported AirbyteType: " + type); - }; - } - - protected Field castedField(final Field field, final AirbyteProtocolType type, final boolean useExpensiveSaferCasting) { - return cast(field, toDialectType(type)); - } - - protected Field currentTimestamp() { - return DSL.currentTimestamp(); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.java deleted file mode 100644 index 641f7c129307..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping; - -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; -import io.airbyte.commons.exceptions.SQLRuntimeException; -import io.airbyte.integrations.base.destination.typing_deduping.BaseDestinationV1V2Migrator; -import io.airbyte.integrations.base.destination.typing_deduping.NamespacedTableName; -import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Collection; -import java.util.Optional; -import lombok.SneakyThrows; - -/** - * Largely based on - * {@link io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV1V2Migrator}. - */ -public class JdbcV1V2Migrator extends BaseDestinationV1V2Migrator { - - private final NamingConventionTransformer namingConventionTransformer; - private final JdbcDatabase database; - private final String databaseName; - - public JdbcV1V2Migrator(final NamingConventionTransformer namingConventionTransformer, final JdbcDatabase database, final String databaseName) { - this.namingConventionTransformer = namingConventionTransformer; - this.database = database; - this.databaseName = databaseName; - } - - @SneakyThrows - @Override - public boolean doesAirbyteInternalNamespaceExist(final StreamConfig streamConfig) { - final String retrievedSchema = database.executeMetadataQuery(dbMetadata -> { - try (ResultSet columns = dbMetadata.getSchemas(databaseName, streamConfig.getId().getRawNamespace())) { - String schema = ""; - while (columns.next()) { - // Catalog can be null, so don't do anything with it. - // columns.getString("TABLE_CATALOG"); - schema = columns.getString("TABLE_SCHEM"); - } - return schema; - } catch (SQLException e) { - throw new SQLRuntimeException(e); - } - }); - - return !retrievedSchema.isEmpty(); - } - - @Override - public boolean schemaMatchesExpectation(final TableDefinition existingTable, final Collection columns) { - return existingTable.columns().keySet().containsAll(columns); - } - - @SneakyThrows - @Override - public Optional getTableIfExists(final String namespace, final String tableName) throws Exception { - return JdbcDestinationHandler.findExistingTable(database, databaseName, namespace, tableName); - } - - @Override - public NamespacedTableName convertToV1RawName(final StreamConfig streamConfig) { - @SuppressWarnings("deprecation") - final String tableName = this.namingConventionTransformer.getRawTableName(streamConfig.getId().getOriginalName()); - return new NamespacedTableName( - this.namingConventionTransformer.getIdentifier(streamConfig.getId().getOriginalNamespace()), - tableName); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java deleted file mode 100644 index fec2cde8ae69..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.staging; - -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.destination.StreamSyncSummary; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnCloseFunction; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnStartFunction; -import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig; -import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve; -import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper; -import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; -import io.airbyte.protocol.models.v0.StreamDescriptor; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.concurrent.locks.Lock; -import lombok.extern.slf4j.Slf4j; - -/** - * Functions and logic common to all flushing strategies. - */ -@Slf4j -public class GeneralStagingFunctions { - - // using a random string here as a placeholder for the moment. - // This would avoid mixing data in the staging area between different syncs (especially if they - // manipulate streams with similar names) - // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to - // leverage data that was uploaded to stage - // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead. - // This would also allow other programs/scripts - // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. - public static final UUID RANDOM_CONNECTION_ID = UUID.randomUUID(); - - public static OnStartFunction onStartFunction(final JdbcDatabase database, - final StagingOperations stagingOperations, - final List writeConfigs, - final TyperDeduper typerDeduper) { - return () -> { - log.info("Preparing raw tables in destination started for {} streams", writeConfigs.size()); - - typerDeduper.prepareSchemasAndRunMigrations(); - - // Create raw tables - final List queryList = new ArrayList<>(); - for (final WriteConfig writeConfig : writeConfigs) { - final String schema = writeConfig.getOutputSchemaName(); - final String stream = writeConfig.getStreamName(); - final String dstTableName = writeConfig.getOutputTableName(); - final String stageName = stagingOperations.getStageName(schema, dstTableName); - final String stagingPath = - stagingOperations.getStagingPath(SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schema, stream, writeConfig.getOutputTableName(), - writeConfig.getWriteDatetime()); - - log.info("Preparing staging area in destination started for schema {} stream {}: target table: {}, stage: {}", - schema, stream, dstTableName, stagingPath); - - stagingOperations.createSchemaIfNotExists(database, schema); - stagingOperations.createTableIfNotExists(database, schema, dstTableName); - stagingOperations.createStageIfNotExists(database, stageName); - - /* - * When we're in OVERWRITE, clear out the table at the start of a sync, this is an expected side - * effect of checkpoint and the removal of temporary tables - */ - switch (writeConfig.getSyncMode()) { - case OVERWRITE -> queryList.add(stagingOperations.truncateTableQuery(database, schema, dstTableName)); - case APPEND, APPEND_DEDUP -> {} - default -> throw new IllegalStateException("Unrecognized sync mode: " + writeConfig.getSyncMode()); - } - - log.info("Preparing staging area in destination completed for schema {} stream {}", schema, stream); - } - - typerDeduper.prepareFinalTables(); - - log.info("Executing finalization of tables."); - stagingOperations.executeTransaction(database, queryList); - }; - } - - /** - * Handles copying data from staging area to destination table and clean up of staged files if - * upload was unsuccessful - */ - public static void copyIntoTableFromStage(final JdbcDatabase database, - final String stageName, - final String stagingPath, - final List stagedFiles, - final String tableName, - final String schemaName, - final StagingOperations stagingOperations, - final String streamNamespace, - final String streamName, - final TypeAndDedupeOperationValve typerDeduperValve, - final TyperDeduper typerDeduper) - throws Exception { - try { - final Lock rawTableInsertLock = typerDeduper.getRawTableInsertLock(streamNamespace, streamName); - rawTableInsertLock.lock(); - try { - stagingOperations.copyIntoTableFromStage(database, stageName, stagingPath, stagedFiles, - tableName, schemaName); - } finally { - rawTableInsertLock.unlock(); - } - - final AirbyteStreamNameNamespacePair streamId = new AirbyteStreamNameNamespacePair(streamName, streamNamespace); - typerDeduperValve.addStreamIfAbsent(streamId); - if (typerDeduperValve.readyToTypeAndDedupe(streamId)) { - typerDeduper.typeAndDedupe(streamId.getNamespace(), streamId.getName(), false); - typerDeduperValve.updateTimeAndIncreaseInterval(streamId); - } - } catch (final Exception e) { - throw new RuntimeException("Failed to upload data from stage " + stagingPath, e); - } - } - - /** - * Tear down process, will attempt to try to clean out any staging area - * - * @param database database used for syncing - * @param stagingOperations collection of SQL queries necessary for writing data into a staging area - * @param writeConfigs configuration settings for all destination connectors needed to write - * @param purgeStagingData drop staging area if true, keep otherwise - * @return - */ - @SuppressWarnings("unchecked") - public static OnCloseFunction onCloseFunction(final JdbcDatabase database, - final StagingOperations stagingOperations, - final List writeConfigs, - final boolean purgeStagingData, - final TyperDeduper typerDeduper) { - return (hasFailed, streamSyncSummaries) -> { - // After moving data from staging area to the target table (airybte_raw) clean up the staging - // area (if user configured) - log.info("Cleaning up destination started for {} streams", writeConfigs.size()); - typerDeduper.typeAndDedupe((Map) streamSyncSummaries); - for (final WriteConfig writeConfig : writeConfigs) { - final String schemaName = writeConfig.getOutputSchemaName(); - if (purgeStagingData) { - final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName()); - final String stagePath = stagingOperations.getStagingPath( - RANDOM_CONNECTION_ID, - schemaName, - writeConfig.getStreamName(), - writeConfig.getOutputTableName(), - writeConfig.getWriteDatetime()); - log.info("Cleaning stage in destination started for stream {}. schema {}, stage: {}", writeConfig.getStreamName(), schemaName, - stagePath); - // TODO: This is another weird manifestation of Redshift vs Snowflake using either or variables from - // stageName/StagingPath. - stagingOperations.dropStageIfExists(database, stageName, stagePath); - } - } - typerDeduper.commitFinalTables(); - typerDeduper.cleanup(); - log.info("Cleaning up destination completed."); - }; - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java deleted file mode 100644 index 767eea233364..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.staging; - -import static java.util.stream.Collectors.joining; - -import com.google.common.annotations.VisibleForTesting; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig; -import io.airbyte.cdk.integrations.destination.record_buffer.FlushBufferFunction; -import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve; -import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper; -import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.io.FileUtils; - -/** - * Serial flushing logic. Though simpler, this causes unnecessary backpressure and slows down the - * entire pipeline. - *

- * Note: This class should be re-written so that is implements the {@link FlushBufferFunction} - * interface, instead of return an anonymous function implementing this interface for clarity. As of - * this writing, we avoid doing so to simplify the migration to async flushing. - */ -@Slf4j -public class SerialFlush { - - /** - * Logic handling how destinations with staging areas (aka bucket storages) will flush their buffer - * - * @param database database used for syncing - * @param stagingOperations collection of SQL queries necessary for writing data into a staging area - * @param writeConfigs configuration settings for all destination connectors needed to write - * @param catalog collection of configured streams (e.g. API endpoints or database tables) - * @return - */ - @VisibleForTesting - public static FlushBufferFunction function( - final JdbcDatabase database, - final StagingOperations stagingOperations, - final List writeConfigs, - final ConfiguredAirbyteCatalog catalog, - final TypeAndDedupeOperationValve typerDeduperValve, - final TyperDeduper typerDeduper) { - // TODO: (ryankfu) move this block of code that executes before the lambda to #onStartFunction - final Set conflictingStreams = new HashSet<>(); - final Map pairToWriteConfig = new HashMap<>(); - for (final WriteConfig config : writeConfigs) { - final AirbyteStreamNameNamespacePair streamIdentifier = toNameNamespacePair(config); - if (pairToWriteConfig.containsKey(streamIdentifier)) { - conflictingStreams.add(config); - final WriteConfig existingConfig = pairToWriteConfig.get(streamIdentifier); - // The first conflicting stream won't have any problems, so we need to explicitly add it here. - conflictingStreams.add(existingConfig); - } else { - pairToWriteConfig.put(streamIdentifier, config); - } - } - if (!conflictingStreams.isEmpty()) { - final String message = String.format( - "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using ${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: %s", - conflictingStreams.stream().map(config -> config.getNamespace() + "." + config.getStreamName()).collect(joining(", "))); - throw new ConfigErrorException(message); - } - return (pair, writer) -> { - log.info("Flushing buffer for stream {} ({}) to staging", pair.getName(), FileUtils.byteCountToDisplaySize(writer.getByteCount())); - if (!pairToWriteConfig.containsKey(pair)) { - throw new IllegalArgumentException( - String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s", Jsons.serialize(catalog))); - } - - final WriteConfig writeConfig = pairToWriteConfig.get(pair); - final String schemaName = writeConfig.getOutputSchemaName(); - final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName()); - final String stagingPath = - stagingOperations.getStagingPath( - SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(), - writeConfig.getOutputTableName(), writeConfig.getWriteDatetime()); - try (writer) { - writer.flush(); - final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stageName, stagingPath); - GeneralStagingFunctions.copyIntoTableFromStage(database, stageName, stagingPath, List.of(stagedFile), writeConfig.getOutputTableName(), - schemaName, - stagingOperations, - writeConfig.getNamespace(), - writeConfig.getStreamName(), - typerDeduperValve, - typerDeduper); - } catch (final Exception e) { - log.error("Failed to flush and commit buffer data into destination's raw table", e); - throw new RuntimeException("Failed to upload buffer to stage and commit to destination", e); - } - }; - } - - private static AirbyteStreamNameNamespacePair toNameNamespacePair(final WriteConfig config) { - return new AirbyteStreamNameNamespacePair(config.getStreamName(), config.getNamespace()); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.java deleted file mode 100644 index a002f1555dfa..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.staging; - -import static java.util.stream.Collectors.toList; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.base.Preconditions; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer; -import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig; -import io.airbyte.cdk.integrations.destination.record_buffer.BufferCreateFunction; -import io.airbyte.cdk.integrations.destination.record_buffer.SerializedBufferingStrategy; -import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve; -import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.AirbyteStream; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.v0.DestinationSyncMode; -import java.time.Instant; -import java.util.List; -import java.util.UUID; -import java.util.function.Consumer; -import java.util.function.Function; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Uses both Factory and Consumer design pattern to create a single point of creation for consuming - * {@link AirbyteMessage} for processing - */ -public class SerialStagingConsumerFactory { - - private static final Logger LOGGER = LoggerFactory.getLogger(SerialStagingConsumerFactory.class); - - // using a random string here as a placeholder for the moment. - // This would avoid mixing data in the staging area between different syncs (especially if they - // manipulate streams with similar names) - // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to - // leverage data that was uploaded to stage - // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead. - // This would also allow other programs/scripts - // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. - private static final Instant SYNC_DATETIME = Instant.now(); - public static final UUID RANDOM_CONNECTION_ID = UUID.randomUUID(); - - public AirbyteMessageConsumer create(final Consumer outputRecordCollector, - final JdbcDatabase database, - final StagingOperations stagingOperations, - final NamingConventionTransformer namingResolver, - final BufferCreateFunction onCreateBuffer, - final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final boolean purgeStagingData, - final TypeAndDedupeOperationValve typerDeduperValve, - final TyperDeduper typerDeduper, - final ParsedCatalog parsedCatalog, - final String defaultNamespace, - final boolean useDestinationsV2Columns) { - final List writeConfigs = createWriteConfigs(namingResolver, config, catalog, parsedCatalog, useDestinationsV2Columns); - return new BufferedStreamConsumer( - outputRecordCollector, - GeneralStagingFunctions.onStartFunction(database, stagingOperations, writeConfigs, typerDeduper), - new SerializedBufferingStrategy( - onCreateBuffer, - catalog, - SerialFlush.function(database, stagingOperations, writeConfigs, catalog, typerDeduperValve, typerDeduper)), - GeneralStagingFunctions.onCloseFunction(database, stagingOperations, writeConfigs, purgeStagingData, typerDeduper), - catalog, - stagingOperations::isValidData, - defaultNamespace); - } - - /** - * Creates a list of all {@link WriteConfig} for each stream within a - * {@link ConfiguredAirbyteCatalog}. Each write config represents the configuration settings for - * writing to a destination connector - * - * @param namingResolver {@link NamingConventionTransformer} used to transform names that are - * acceptable by each destination connector - * @param config destination connector configuration parameters - * @param catalog {@link ConfiguredAirbyteCatalog} collection of configured - * {@link ConfiguredAirbyteStream} - * @return list of all write configs for each stream in a {@link ConfiguredAirbyteCatalog} - */ - private static List createWriteConfigs(final NamingConventionTransformer namingResolver, - final JsonNode config, - final ConfiguredAirbyteCatalog catalog, - final ParsedCatalog parsedCatalog, - final boolean useDestinationsV2Columns) { - - return catalog.getStreams().stream().map(toWriteConfig(namingResolver, config, parsedCatalog, useDestinationsV2Columns)).collect(toList()); - } - - private static Function toWriteConfig(final NamingConventionTransformer namingResolver, - final JsonNode config, - final ParsedCatalog parsedCatalog, - final boolean useDestinationsV2Columns) { - return stream -> { - Preconditions.checkNotNull(stream.getDestinationSyncMode(), "Undefined destination sync mode"); - final AirbyteStream abStream = stream.getStream(); - final String streamName = abStream.getName(); - - final String outputSchema; - final String tableName; - if (useDestinationsV2Columns) { - final StreamId streamId = parsedCatalog.getStream(abStream.getNamespace(), streamName).getId(); - outputSchema = streamId.getRawNamespace(); - tableName = streamId.getRawName(); - } else { - outputSchema = getOutputSchema(abStream, config.get("schema").asText(), namingResolver); - tableName = namingResolver.getRawTableName(streamName); - } - final String tmpTableName = namingResolver.getTmpTableName(streamName); - final DestinationSyncMode syncMode = stream.getDestinationSyncMode(); - - final WriteConfig writeConfig = - new WriteConfig(streamName, abStream.getNamespace(), outputSchema, tmpTableName, tableName, syncMode, SYNC_DATETIME); - LOGGER.info("Write config: {}", writeConfig); - - return writeConfig; - }; - } - - private static String getOutputSchema(final AirbyteStream stream, - final String defaultDestSchema, - final NamingConventionTransformer namingResolver) { - return stream.getNamespace() != null - ? namingResolver.getNamespace(stream.getNamespace()) - : namingResolver.getNamespace(defaultDestSchema); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt new file mode 100644 index 000000000000..b9c21dec012a --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.fasterxml.jackson.databind.JsonNode +import com.google.common.annotations.VisibleForTesting +import io.airbyte.cdk.db.factory.DataSourceFactory +import io.airbyte.cdk.db.factory.DataSourceFactory.close +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.db.jdbc.JdbcUtils +import io.airbyte.cdk.db.jdbc.JdbcUtils.parseJdbcParameters +import io.airbyte.cdk.integrations.JdbcConnector +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer +import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility.emitConfigErrorTrace +import io.airbyte.cdk.integrations.base.Destination +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer +import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.getRawNamespaceOverride +import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 +import io.airbyte.cdk.integrations.base.errors.messages.ErrorMessage.getErrorMessage +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer +import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteRecordMessage +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcV1V2Migrator +import io.airbyte.cdk.integrations.util.addDefaultNamespaceToStreams +import io.airbyte.commons.exceptions.ConnectionErrorException +import io.airbyte.commons.json.Jsons +import io.airbyte.commons.map.MoreMaps +import io.airbyte.integrations.base.destination.typing_deduping.* +import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration +import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import java.sql.Connection +import java.sql.ResultSet +import java.sql.SQLException +import java.util.* +import java.util.function.Consumer +import javax.sql.DataSource +import org.apache.commons.lang3.NotImplementedException +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +abstract class AbstractJdbcDestination( + driverClass: String, + protected val namingResolver: NamingConventionTransformer, + protected val sqlOperations: SqlOperations +) : JdbcConnector(driverClass), Destination { + protected val configSchemaKey: String + get() = "schema" + + override fun check(config: JsonNode): AirbyteConnectionStatus? { + val dataSource = getDataSource(config) + + try { + val database = getDatabase(dataSource) + val outputSchema = namingResolver.getIdentifier(config[JdbcUtils.SCHEMA_KEY].asText()) + attemptTableOperations(outputSchema, database, namingResolver, sqlOperations, false) + if (isDestinationV2) { + val v2RawSchema = + namingResolver.getIdentifier( + getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE) + .orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE) + ) + attemptTableOperations(v2RawSchema, database, namingResolver, sqlOperations, false) + destinationSpecificTableOperations(database) + } + return AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED) + } catch (ex: ConnectionErrorException) { + val message = getErrorMessage(ex.stateCode, ex.errorCode, ex.exceptionMessage, ex) + emitConfigErrorTrace(ex, message) + return AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage(message) + } catch (e: Exception) { + LOGGER.error("Exception while checking connection: ", e) + return AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage( + """ + Could not connect with provided configuration. + ${e.message} + """.trimIndent() + ) + } finally { + try { + close(dataSource) + } catch (e: Exception) { + LOGGER.warn("Unable to close data source.", e) + } + } + } + + /** + * Specific Databases may have additional checks unique to them which they need to perform, + * override this method to add additional checks. + * + * @param database the database to run checks against + * @throws Exception + */ + @Throws(Exception::class) + protected fun destinationSpecificTableOperations(database: JdbcDatabase?) {} + + /** + * Subclasses which need to modify the DataSource should override [.modifyDataSourceBuilder] + * rather than this method. + */ + @VisibleForTesting + fun getDataSource(config: JsonNode): DataSource { + val jdbcConfig = toJdbcConfig(config) + val connectionProperties = getConnectionProperties(config) + val builder = + DataSourceFactory.DataSourceBuilder( + jdbcConfig[JdbcUtils.USERNAME_KEY].asText(), + if (jdbcConfig.has(JdbcUtils.PASSWORD_KEY)) + jdbcConfig[JdbcUtils.PASSWORD_KEY].asText() + else null, + driverClassName, + jdbcConfig[JdbcUtils.JDBC_URL_KEY].asText() + ) + .withConnectionProperties(connectionProperties) + .withConnectionTimeout(getConnectionTimeout(connectionProperties)) + return modifyDataSourceBuilder(builder).build() + } + + protected fun modifyDataSourceBuilder( + builder: DataSourceFactory.DataSourceBuilder + ): DataSourceFactory.DataSourceBuilder { + return builder + } + + @VisibleForTesting + fun getDatabase(dataSource: DataSource): JdbcDatabase { + return DefaultJdbcDatabase(dataSource) + } + + protected open fun getConnectionProperties(config: JsonNode): Map { + val customProperties = parseJdbcParameters(config, JdbcUtils.JDBC_URL_PARAMS_KEY) + val defaultProperties = getDefaultConnectionProperties(config) + assertCustomParametersDontOverwriteDefaultParameters(customProperties, defaultProperties) + return MoreMaps.merge(customProperties, defaultProperties) + } + + private fun assertCustomParametersDontOverwriteDefaultParameters( + customParameters: Map, + defaultParameters: Map + ) { + for (key in defaultParameters.keys) { + require( + !(customParameters.containsKey(key) && + customParameters[key] != defaultParameters[key]) + ) { "Cannot overwrite default JDBC parameter $key" } + } + } + + protected abstract fun getDefaultConnectionProperties(config: JsonNode): Map + + abstract fun toJdbcConfig(config: JsonNode): JsonNode + + protected abstract val sqlGenerator: JdbcSqlGenerator + + protected abstract fun getDestinationHandler( + databaseName: String, + database: JdbcDatabase, + rawTableSchema: String + ): JdbcDestinationHandler + + /** + * Provide any migrations that the destination needs to run. Most destinations will need to + * provide an instande of + * [io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcV1V2Migrator] at minimum. + */ + protected abstract fun getMigrations( + database: JdbcDatabase, + databaseName: String, + sqlGenerator: SqlGenerator, + destinationHandler: DestinationHandler + ): List> + + /** + * "database" key at root of the config json, for any other variants in config, override this + * method. + * + * @param config + * @return + */ + protected fun getDatabaseName(config: JsonNode): String { + return config[JdbcUtils.DATABASE_KEY].asText() + } + + protected fun getDataTransformer( + parsedCatalog: ParsedCatalog?, + defaultNamespace: String? + ): StreamAwareDataTransformer { + return IdentityDataTransformer() + } + + override fun getConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): AirbyteMessageConsumer? { + throw NotImplementedException("Should use the getSerializedMessageConsumer instead") + } + + @Throws(Exception::class) + override fun getSerializedMessageConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): SerializedAirbyteMessageConsumer? { + val database = getDatabase(getDataSource(config)) + // Short circuit for non-v2 destinations. + if (!isDestinationV2) { + return JdbcBufferedConsumerFactory.createAsync( + outputRecordCollector, + database, + sqlOperations, + namingResolver, + config, + catalog, + null, + NoopTyperDeduper() + ) + } + + val defaultNamespace = config[configSchemaKey].asText() + addDefaultNamespaceToStreams(catalog!!, defaultNamespace) + return getV2MessageConsumer( + config, + catalog, + outputRecordCollector, + database, + defaultNamespace + ) + } + + private fun getV2MessageConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog?, + outputRecordCollector: Consumer, + database: JdbcDatabase, + defaultNamespace: String + ): SerializedAirbyteMessageConsumer? { + val sqlGenerator = sqlGenerator + val rawNamespaceOverride = getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE) + val parsedCatalog = + rawNamespaceOverride + .map { override: String -> CatalogParser(sqlGenerator, override) } + .orElse(CatalogParser(sqlGenerator)) + .parseCatalog(catalog!!) + val databaseName = getDatabaseName(config) + val migrator = JdbcV1V2Migrator(namingResolver, database, databaseName) + val v2TableMigrator = NoopV2TableMigrator() + val destinationHandler: DestinationHandler = + getDestinationHandler( + databaseName, + database, + rawNamespaceOverride.orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE) + ) + val disableTypeDedupe = + config.has(DISABLE_TYPE_DEDUPE) && config[DISABLE_TYPE_DEDUPE].asBoolean(false) + val typerDeduper: TyperDeduper + val migrations = getMigrations(database, databaseName, sqlGenerator, destinationHandler) + typerDeduper = + if (disableTypeDedupe) { + NoOpTyperDeduperWithV1V2Migrations( + sqlGenerator, + destinationHandler, + parsedCatalog, + migrator, + v2TableMigrator, + migrations + ) + } else { + DefaultTyperDeduper( + sqlGenerator, + destinationHandler, + parsedCatalog, + migrator, + v2TableMigrator, + migrations + ) + } + + return JdbcBufferedConsumerFactory.createAsync( + outputRecordCollector, + database, + sqlOperations, + namingResolver, + config, + catalog, + defaultNamespace, + typerDeduper, + getDataTransformer(parsedCatalog, defaultNamespace) + ) + } + + companion object { + private val LOGGER: Logger = LoggerFactory.getLogger(AbstractJdbcDestination::class.java) + + const val RAW_SCHEMA_OVERRIDE: String = "raw_data_schema" + + const val DISABLE_TYPE_DEDUPE: String = "disable_type_dedupe" + + /** + * This method is deprecated. It verifies table creation, but not insert right to a newly + * created table. Use attemptTableOperations with the attemptInsert argument instead. + */ + @Deprecated("") + @Throws(Exception::class) + fun attemptSQLCreateAndDropTableOperations( + outputSchema: String?, + database: JdbcDatabase, + namingResolver: NamingConventionTransformer, + sqlOps: SqlOperations + ) { + attemptTableOperations(outputSchema, database, namingResolver, sqlOps, false) + } + + /** + * Verifies if provided creds has enough permissions. Steps are: 1. Create schema if not + * exists. 2. Create test table. 3. Insert dummy record to newly created table if + * "attemptInsert" set to true. + * 4. Delete table created on step 2. + * + * @param outputSchema + * - schema to tests against. + * @param database + * - database to tests against. + * @param namingResolver + * - naming resolver. + * @param sqlOps + * - SqlOperations object + * @param attemptInsert + * - set true if need to make attempt to insert dummy records to newly created table. Set + * false to skip insert step. + */ + @Throws(Exception::class) + fun attemptTableOperations( + outputSchema: String?, + database: JdbcDatabase, + namingResolver: NamingConventionTransformer, + sqlOps: SqlOperations, + attemptInsert: Boolean + ) { + // verify we have write permissions on the target schema by creating a table with a + // random name, + // then dropping that table + try { + // Get metadata from the database to see whether connection is possible + database.bufferedResultSetQuery( + { conn: Connection -> conn.metaData.catalogs }, + { queryContext: ResultSet? -> + JdbcUtils.defaultSourceOperations.rowToJson(queryContext!!) + } + ) + + // verify we have write permissions on the target schema by creating a table with a + // random name, + // then dropping that table + val outputTableName = + namingResolver.getIdentifier( + "_airbyte_connection_test_" + + UUID.randomUUID().toString().replace("-".toRegex(), "") + ) + sqlOps.createSchemaIfNotExists(database, outputSchema) + sqlOps.createTableIfNotExists(database, outputSchema, outputTableName) + // verify if user has permission to make SQL INSERT queries + try { + if (attemptInsert) { + sqlOps.insertRecords( + database, + java.util.List.of(dummyRecord), + outputSchema, + outputTableName + ) + } + } finally { + sqlOps.dropTableIfExists(database, outputSchema, outputTableName) + } + } catch (e: SQLException) { + if (Objects.isNull(e.cause) || e.cause !is SQLException) { + throw ConnectionErrorException(e.sqlState, e.errorCode, e.message, e) + } else { + val cause = e.cause as SQLException? + throw ConnectionErrorException(e.sqlState, cause!!.errorCode, cause.message, e) + } + } catch (e: Exception) { + throw Exception(e) + } + } + + private val dummyRecord: PartialAirbyteMessage + /** + * Generates a dummy AirbyteRecordMessage with random values. + * + * @return AirbyteRecordMessage object with dummy values that may be used to test insert + * permission. + */ + get() { + val dummyDataToInsert = Jsons.deserialize("{ \"field1\": true }") + return PartialAirbyteMessage() + .withRecord( + PartialAirbyteRecordMessage() + .withStream("stream1") + .withEmittedAt(1602637589000L) + ) + .withSerialized(dummyDataToInsert.toString()) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/ColumnDefinition.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/ColumnDefinition.kt new file mode 100644 index 000000000000..43b771dc9631 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/ColumnDefinition.kt @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +/** + * Jdbc destination column definition representation + * + * @param name + * @param type + * @param columnSize + */ +class ColumnDefinition(name: String, type: String, columnSize: Int, isNullable: Boolean) { + val name: String + val type: String + val columnSize: Int + val isNullable: Boolean + + init { + this.name = name + this.type = type + this.columnSize = columnSize + this.isNullable = isNullable + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapter.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapter.kt new file mode 100644 index 000000000000..ff82ad3b0157 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapter.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import java.util.function.Function +import java.util.function.Predicate + +class DataAdapter +/** + * Data adapter allows applying destination data rules. For example, Postgres destination can't + * process text value with \u0000 unicode. You can describe filter condition for a value node and + * function which adapts filtered value nodes. + * + * @param filterValueNode + * - filter condition which decide which value node should be adapted + * @param valueNodeAdapter + * - transformation function which returns adapted value node + */ +( + private val filterValueNode: Predicate, + private val valueNodeAdapter: Function +) { + fun adapt(messageData: JsonNode?) { + if (messageData != null) { + adaptAllValueNodes(messageData) + } + } + + private fun adaptAllValueNodes(rootNode: JsonNode) { + adaptValueNodes(null, rootNode, null) + } + + /** + * The method inspects json node. In case, it's a value node we check the node by CheckFunction + * and apply ValueNodeAdapter. Filtered nodes will be updated by adapted version. If element is + * an array or an object, this we run the method recursively for them. + * + * @param fieldName Name of a json node + * @param node Json node + * @param parentNode Parent json node + */ + private fun adaptValueNodes(fieldName: String?, node: JsonNode, parentNode: JsonNode?) { + if (node.isValueNode && filterValueNode.test(node)) { + if (fieldName != null) { + val adaptedNode = valueNodeAdapter.apply(node) + (parentNode as ObjectNode?)!!.set(fieldName, adaptedNode) + } else throw RuntimeException("Unexpected value node without fieldName. Node: $node") + } else if (node.isArray) { + node.elements().forEachRemaining { arrayNode: JsonNode -> + adaptValueNodes(null, arrayNode, node) + } + } else { + node.fields().forEachRemaining { stringJsonNodeEntry: Map.Entry -> + adaptValueNodes(stringJsonNodeEntry.key, stringJsonNodeEntry.value, node) + } + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt new file mode 100644 index 000000000000..a13c2f3dba43 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.fasterxml.jackson.databind.JsonNode +import com.google.common.base.Preconditions +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.db.jdbc.JdbcUtils +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer +import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.getRawNamespaceOverride +import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.destination.StreamSyncSummary +import io.airbyte.cdk.integrations.destination.async.AsyncStreamConsumer +import io.airbyte.cdk.integrations.destination.async.buffers.BufferManager +import io.airbyte.cdk.integrations.destination.async.deser.DeserializationUtil +import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer +import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import io.airbyte.cdk.integrations.destination.async.state.FlushFailure +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnCloseFunction +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnStartFunction +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.RecordWriter +import io.airbyte.commons.json.Jsons +import io.airbyte.integrations.base.destination.typing_deduping.StreamId.Companion.concatenateRawTableName +import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper +import io.airbyte.protocol.models.v0.* +import java.util.* +import java.util.concurrent.Executors +import java.util.function.Consumer +import java.util.function.Function +import java.util.stream.Collectors +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +/** + * Strategy: + * + * 1. Create a final table for each stream + * + * 2. Accumulate records in a buffer. One buffer per stream + * + * 3. As records accumulate write them in batch to the database. We set a minimum numbers of records + * before writing to avoid wasteful record-wise writes. In the case with slow syncs this will be + * superseded with a periodic record flush from [BufferedStreamConsumer.periodicBufferFlush] + * + * 4. Once all records have been written to buffer, flush the buffer and write any remaining records + * to the database (regardless of how few are left) + */ +object JdbcBufferedConsumerFactory { + private val LOGGER: Logger = LoggerFactory.getLogger(JdbcBufferedConsumerFactory::class.java) + + @JvmOverloads + fun createAsync( + outputRecordCollector: Consumer, + database: JdbcDatabase, + sqlOperations: SqlOperations, + namingResolver: NamingConventionTransformer, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + defaultNamespace: String?, + typerDeduper: TyperDeduper, + dataTransformer: StreamAwareDataTransformer = IdentityDataTransformer() + ): SerializedAirbyteMessageConsumer { + val writeConfigs = + createWriteConfigs(namingResolver, config, catalog, sqlOperations.isSchemaRequired) + return AsyncStreamConsumer( + outputRecordCollector, + onStartFunction(database, sqlOperations, writeConfigs, typerDeduper), + onCloseFunction(typerDeduper), + JdbcInsertFlushFunction( + recordWriterFunction(database, sqlOperations, writeConfigs, catalog) + ), + catalog, + BufferManager((Runtime.getRuntime().maxMemory() * 0.2).toLong()), + FlushFailure(), + Optional.ofNullable(defaultNamespace), + Executors.newFixedThreadPool(2), + dataTransformer, + DeserializationUtil() + ) + } + + private fun createWriteConfigs( + namingResolver: NamingConventionTransformer, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog?, + schemaRequired: Boolean + ): List { + if (schemaRequired) { + Preconditions.checkState( + config.has("schema"), + "jdbc destinations must specify a schema." + ) + } + return catalog!! + .streams + .stream() + .map(toWriteConfig(namingResolver, config, schemaRequired)) + .collect(Collectors.toList()) + } + + private fun toWriteConfig( + namingResolver: NamingConventionTransformer, + config: JsonNode, + schemaRequired: Boolean + ): Function { + return Function { stream: ConfiguredAirbyteStream -> + Preconditions.checkNotNull( + stream.destinationSyncMode, + "Undefined destination sync mode" + ) + val abStream = stream.stream + + val defaultSchemaName = + if (schemaRequired) namingResolver.getIdentifier(config["schema"].asText()) + else namingResolver.getIdentifier(config[JdbcUtils.DATABASE_KEY].asText()) + // Method checks for v2 + val outputSchema = getOutputSchema(abStream, defaultSchemaName, namingResolver) + val streamName = abStream.name + val tableName: String + val tmpTableName: String + // TODO: Should this be injected from outside ? + if (isDestinationV2) { + val finalSchema = Optional.ofNullable(abStream.namespace).orElse(defaultSchemaName) + val rawName = concatenateRawTableName(finalSchema, streamName) + tableName = namingResolver.convertStreamName(rawName) + tmpTableName = namingResolver.getTmpTableName(rawName) + } else { + tableName = namingResolver.getRawTableName(streamName) + tmpTableName = namingResolver.getTmpTableName(streamName) + } + val syncMode = stream.destinationSyncMode + + val writeConfig = + WriteConfig( + streamName, + abStream.namespace, + outputSchema, + tmpTableName, + tableName, + syncMode + ) + LOGGER.info("Write config: {}", writeConfig) + writeConfig + } + } + + /** + * Defer to the [AirbyteStream]'s namespace. If this is not set, use the destination's default + * schema. This namespace is source-provided, and can be potentially empty. + * + * The logic here matches the logic in the catalog_process.py for Normalization. Any + * modifications need to be reflected there and vice versa. + */ + private fun getOutputSchema( + stream: AirbyteStream, + defaultDestSchema: String, + namingResolver: NamingConventionTransformer + ): String { + return if (isDestinationV2) { + namingResolver.getNamespace( + getRawNamespaceOverride(AbstractJdbcDestination.Companion.RAW_SCHEMA_OVERRIDE) + .orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE) + ) + } else { + namingResolver.getNamespace( + Optional.ofNullable(stream.namespace).orElse(defaultDestSchema) + ) + } + } + + /** + * Sets up destination storage through: + * + * 1. Creates Schema (if not exists) + * + * 2. Creates airybte_raw table (if not exists) + * + * 3. Truncates table if sync mode is in OVERWRITE + * + * @param database JDBC database to connect to + * @param sqlOperations interface for execution SQL queries + * @param writeConfigs settings for each stream + */ + private fun onStartFunction( + database: JdbcDatabase, + sqlOperations: SqlOperations, + writeConfigs: Collection, + typerDeduper: TyperDeduper + ): OnStartFunction { + return OnStartFunction { + typerDeduper.prepareSchemasAndRunMigrations() + LOGGER.info( + "Preparing raw tables in destination started for {} streams", + writeConfigs.size + ) + val queryList: MutableList = ArrayList() + for (writeConfig in writeConfigs) { + val schemaName = writeConfig.outputSchemaName + val dstTableName = writeConfig.outputTableName + LOGGER.info( + "Preparing raw table in destination started for stream {}. schema: {}, table name: {}", + writeConfig.streamName, + schemaName, + dstTableName + ) + sqlOperations.createSchemaIfNotExists(database, schemaName) + sqlOperations.createTableIfNotExists(database, schemaName, dstTableName) + when (writeConfig.syncMode) { + DestinationSyncMode.OVERWRITE -> + queryList.add( + sqlOperations.truncateTableQuery(database, schemaName, dstTableName) + ) + DestinationSyncMode.APPEND, + DestinationSyncMode.APPEND_DEDUP -> {} + else -> + throw IllegalStateException( + "Unrecognized sync mode: " + writeConfig.syncMode + ) + } + } + sqlOperations.executeTransaction(database, queryList) + LOGGER.info("Preparing raw tables in destination completed.") + typerDeduper.prepareFinalTables() + } + } + + /** + * Writes [AirbyteRecordMessage] to JDBC database's airbyte_raw table + * + * @param database JDBC database to connect to + * @param sqlOperations interface of SQL queries to execute + * @param writeConfigs settings for each stream + * @param catalog catalog of all streams to sync + */ + private fun recordWriterFunction( + database: JdbcDatabase, + sqlOperations: SqlOperations, + writeConfigs: List, + catalog: ConfiguredAirbyteCatalog? + ): RecordWriter { + val pairToWriteConfig: Map = + writeConfigs.associateBy { toNameNamespacePair(it) } + + return RecordWriter { + pair: AirbyteStreamNameNamespacePair, + records: List -> + require(pairToWriteConfig.containsKey(pair)) { + String.format( + "Message contained record from a stream that was not in the catalog. \ncatalog: %s", + Jsons.serialize(catalog) + ) + } + val writeConfig = pairToWriteConfig.getValue(pair) + sqlOperations.insertRecords( + database, + ArrayList(records), + writeConfig.outputSchemaName, + writeConfig.outputTableName + ) + } + } + + /** Tear down functionality */ + private fun onCloseFunction(typerDeduper: TyperDeduper): OnCloseFunction { + return OnCloseFunction { + hasFailed: Boolean, + streamSyncSummaries: Map -> + try { + typerDeduper.typeAndDedupe(streamSyncSummaries) + typerDeduper.commitFinalTables() + typerDeduper.cleanup() + } catch (e: Exception) { + throw RuntimeException(e) + } + } + } + + private fun toNameNamespacePair(config: WriteConfig): AirbyteStreamNameNamespacePair { + return AirbyteStreamNameNamespacePair(config.streamName, config.namespace) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt new file mode 100644 index 000000000000..613856d191cb --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import io.airbyte.cdk.integrations.destination.async.function.DestinationFlushFunction +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.RecordWriter +import io.airbyte.cdk.integrations.destination.jdbc.constants.GlobalDataSizeConstants +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair +import io.airbyte.protocol.models.v0.StreamDescriptor +import java.util.stream.Stream + +class JdbcInsertFlushFunction(private val recordWriter: RecordWriter) : + DestinationFlushFunction { + @Throws(Exception::class) + override fun flush(desc: StreamDescriptor, stream: Stream) { + recordWriter.accept( + AirbyteStreamNameNamespacePair(desc.name, desc.namespace), + stream.toList() + ) + } + + override val optimalBatchSizeBytes: Long + get() = // TODO tune this value - currently SqlOperationUtils partitions 10K records per + // insert statement, + // but we'd like to stop doing that and instead control sql insert statement size via + // batch size. + GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES.toLong() +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt new file mode 100644 index 000000000000..e1191f9b45a9 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import io.airbyte.commons.exceptions.ConfigErrorException +import io.airbyte.commons.json.Jsons +import java.io.File +import java.io.PrintWriter +import java.nio.charset.StandardCharsets +import java.sql.SQLException +import java.sql.Timestamp +import java.time.Instant +import java.util.* +import java.util.function.Consumer +import org.apache.commons.csv.CSVFormat +import org.apache.commons.csv.CSVPrinter + +abstract class JdbcSqlOperations : SqlOperations { + // this adapter modifies record message before inserting them to the destination + protected val dataAdapter: Optional + protected val schemaSet: MutableSet = HashSet() + + protected constructor() { + this.dataAdapter = Optional.empty() + } + + protected constructor(dataAdapter: DataAdapter) { + this.dataAdapter = Optional.of(dataAdapter) + } + + @Throws(Exception::class) + override fun createSchemaIfNotExists(database: JdbcDatabase?, schemaName: String?) { + try { + if (!schemaSet.contains(schemaName) && !isSchemaExists(database, schemaName)) { + database!!.execute(String.format("CREATE SCHEMA IF NOT EXISTS %s;", schemaName)) + schemaSet.add(schemaName) + } + } catch (e: Exception) { + throw checkForKnownConfigExceptions(e).orElseThrow { e } + } + } + + /** + * When an exception occurs, we may recognize it as an issue with the users permissions or other + * configuration options. In these cases, we can wrap the exception in a [ConfigErrorException] + * which will exclude the error from our on-call paging/reporting + * + * @param e the exception to check. + * @return A ConfigErrorException with a message with actionable feedback to the user. + */ + protected fun checkForKnownConfigExceptions(e: Exception?): Optional { + return Optional.empty() + } + + @Throws(SQLException::class) + override fun createTableIfNotExists( + database: JdbcDatabase, + schemaName: String?, + tableName: String? + ) { + try { + database.execute(createTableQuery(database, schemaName, tableName)) + for (postCreateSql in postCreateTableQueries(schemaName, tableName)) { + database.execute(postCreateSql) + } + } catch (e: SQLException) { + throw checkForKnownConfigExceptions(e).orElseThrow { e } + } + } + + override fun createTableQuery( + database: JdbcDatabase?, + schemaName: String?, + tableName: String? + ): String? { + return if (isDestinationV2) { + createTableQueryV2(schemaName, tableName) + } else { + createTableQueryV1(schemaName, tableName) + } + } + + /** + * Some subclasses may want to execute additional SQL statements after creating the raw table. + * For example, Postgres does not support index definitions within a CREATE TABLE statement, so + * we need to run CREATE INDEX statements after creating the table. + */ + protected fun postCreateTableQueries(schemaName: String?, tableName: String?): List { + return listOf() + } + + protected fun createTableQueryV1(schemaName: String?, tableName: String?): String { + return String.format( + """ + CREATE TABLE IF NOT EXISTS %s.%s ( + %s VARCHAR PRIMARY KEY, + %s JSONB, + %s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP + ); + + """.trimIndent(), + schemaName, + tableName, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + ) + } + + protected fun createTableQueryV2(schemaName: String?, tableName: String?): String { + // Note that Meta is the last column in order, there was a time when tables didn't have + // meta, + // we issued Alter to add that column so it should be the last column. + return String.format( + """ + CREATE TABLE IF NOT EXISTS %s.%s ( + %s VARCHAR PRIMARY KEY, + %s JSONB, + %s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + %s TIMESTAMP WITH TIME ZONE DEFAULT NULL, + %s JSONB + ); + + """.trimIndent(), + schemaName, + tableName, + JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, + JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT, + JavaBaseConstants.COLUMN_NAME_AB_META + ) + } + + // TODO: This method seems to be used by Postgres and others while staging to local temp files. + // Should there be a Local staging operations equivalent + @Throws(Exception::class) + protected fun writeBatchToFile(tmpFile: File?, records: List) { + PrintWriter(tmpFile, StandardCharsets.UTF_8).use { writer -> + CSVPrinter(writer, CSVFormat.DEFAULT).use { csvPrinter -> + for (record in records) { + val uuid = UUID.randomUUID().toString() + // TODO we only need to do this is formatData is overridden. If not, we can just + // do jsonData = + // record.getSerialized() + val jsonData = + Jsons.serialize(formatData(Jsons.deserializeExact(record.serialized))) + val airbyteMeta = Jsons.serialize(record.record!!.meta) + val extractedAt = + Timestamp.from(Instant.ofEpochMilli(record.record!!.emittedAt)) + if (isDestinationV2) { + csvPrinter.printRecord(uuid, jsonData, extractedAt, null, airbyteMeta) + } else { + csvPrinter.printRecord(uuid, jsonData, extractedAt) + } + } + } + } + } + + protected fun formatData(data: JsonNode): JsonNode { + return data + } + + override fun truncateTableQuery( + database: JdbcDatabase?, + schemaName: String?, + tableName: String? + ): String { + return String.format("TRUNCATE TABLE %s.%s;\n", schemaName, tableName) + } + + override fun insertTableQuery( + database: JdbcDatabase?, + schemaName: String?, + srcTableName: String?, + dstTableName: String? + ): String? { + return String.format( + "INSERT INTO %s.%s SELECT * FROM %s.%s;\n", + schemaName, + dstTableName, + schemaName, + srcTableName + ) + } + + @Throws(Exception::class) + override fun executeTransaction(database: JdbcDatabase, queries: List) { + val appendedQueries = StringBuilder() + appendedQueries.append("BEGIN;\n") + for (query in queries) { + appendedQueries.append(query) + } + appendedQueries.append("COMMIT;") + database.execute(appendedQueries.toString()) + } + + @Throws(SQLException::class) + override fun dropTableIfExists( + database: JdbcDatabase, + schemaName: String?, + tableName: String? + ) { + try { + database.execute(dropTableIfExistsQuery(schemaName, tableName)) + } catch (e: SQLException) { + throw checkForKnownConfigExceptions(e).orElseThrow { e } + } + } + + fun dropTableIfExistsQuery(schemaName: String?, tableName: String?): String { + return String.format("DROP TABLE IF EXISTS %s.%s;\n", schemaName, tableName) + } + + override val isSchemaRequired: Boolean + get() = true + + override fun isValidData(data: JsonNode?): Boolean { + return true + } + + @Throws(Exception::class) + override fun insertRecords( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) { + dataAdapter.ifPresent { adapter: DataAdapter -> + records!!.forEach( + Consumer { airbyteRecordMessage: PartialAirbyteMessage? -> + val data = Jsons.deserializeExact(airbyteRecordMessage!!.serialized) + adapter.adapt(data) + airbyteRecordMessage.serialized = Jsons.serialize(data) + } + ) + } + if (isDestinationV2) { + insertRecordsInternalV2(database, records, schemaName, tableName) + } else { + insertRecordsInternal(database, records, schemaName, tableName) + } + } + + @Throws(Exception::class) + protected abstract fun insertRecordsInternal( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) + + @Throws(Exception::class) + protected abstract fun insertRecordsInternalV2( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) + + companion object { + protected const val SHOW_SCHEMAS: String = "show schemas;" + protected const val NAME: String = "name" + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt new file mode 100644 index 000000000000..32a779207b17 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.google.common.annotations.VisibleForTesting +import com.google.common.collect.Iterables +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import io.airbyte.commons.functional.CheckedConsumer +import java.sql.Connection +import java.sql.SQLException +import java.sql.Timestamp +import java.time.Instant +import java.util.* +import java.util.function.Consumer +import java.util.function.Supplier + +object SqlOperationsUtils { + /** + * Inserts "raw" records in a single query. The purpose of helper to abstract away + * database-specific SQL syntax from this query. + * + * @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id, + * data, emitted_at) + * @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?) + * @param jdbcDatabase jdbc database + * @param records records to write + * @throws SQLException exception + */ + @Throws(SQLException::class) + fun insertRawRecordsInSingleQuery( + insertQueryComponent: String?, + recordQueryComponent: String?, + jdbcDatabase: JdbcDatabase, + records: List + ) { + insertRawRecordsInSingleQuery( + insertQueryComponent, + recordQueryComponent, + jdbcDatabase, + records, + { UUID.randomUUID() }, + true + ) + } + + /** + * Inserts "raw" records in a single query. The purpose of helper to abstract away + * database-specific SQL syntax from this query. + * + * This version does not add a semicolon at the end of the INSERT statement. + * + * @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id, + * data, emitted_at) + * @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?) + * @param jdbcDatabase jdbc database + * @param records records to write + * @throws SQLException exception + */ + @Throws(SQLException::class) + fun insertRawRecordsInSingleQueryNoSem( + insertQueryComponent: String?, + recordQueryComponent: String?, + jdbcDatabase: JdbcDatabase, + records: List + ) { + insertRawRecordsInSingleQuery( + insertQueryComponent, + recordQueryComponent, + jdbcDatabase, + records, + { UUID.randomUUID() }, + false + ) + } + + @VisibleForTesting + @Throws(SQLException::class) + fun insertRawRecordsInSingleQuery( + insertQueryComponent: String?, + recordQueryComponent: String?, + jdbcDatabase: JdbcDatabase, + records: List, + uuidSupplier: Supplier, + sem: Boolean + ) { + if (records.isEmpty()) { + return + } + + jdbcDatabase.execute( + CheckedConsumer { connection: Connection -> + + // Strategy: We want to use PreparedStatement because it handles binding values to + // the SQL query + // (e.g. handling formatting timestamps). A PreparedStatement statement is created + // by supplying the + // full SQL string at creation time. Then subsequently specifying which values are + // bound to the + // string. Thus there will be two loops below. + // 1) Loop over records to build the full string. + // 2) Loop over the records and bind the appropriate values to the string. + // We also partition the query to run on 10k records at a time, since some DBs set a + // max limit on + // how many records can be inserted at once + // TODO(sherif) this should use a smarter, destination-aware partitioning scheme + // instead of 10k by + // default + for (partition in Iterables.partition(records, 10000)) { + val sql = StringBuilder(insertQueryComponent) + partition.forEach( + Consumer { r: PartialAirbyteMessage? -> sql.append(recordQueryComponent) } + ) + val s = sql.toString() + val s1 = s.substring(0, s.length - 2) + (if (sem) ";" else "") + + connection.prepareStatement(s1).use { statement -> + // second loop: bind values to the SQL string. + // 1-indexed + var i = 1 + for (message in partition) { + // Airbyte Raw ID + statement.setString(i, uuidSupplier.get().toString()) + i++ + + // Message Data + statement.setString(i, message.serialized) + i++ + + // Extracted At + statement.setTimestamp( + i, + Timestamp.from(Instant.ofEpochMilli(message.record!!.emittedAt)) + ) + i++ + + if (isDestinationV2) { + // Loaded At + statement.setTimestamp(i, null) + i++ + } + } + statement.execute() + } + } + } + ) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt similarity index 53% rename from airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.java rename to airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt index c8fc4f2e7ca8..72309e3628aa 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt @@ -1,16 +1,11 @@ /* * Copyright (c) 2023 Airbyte, Inc., all rights reserved. */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import java.util.LinkedHashMap; +package io.airbyte.cdk.integrations.destination.jdbc /** * Jdbc destination table definition representation with a map of column names to column definitions * * @param columns */ -public record TableDefinition(LinkedHashMap columns) { - -} +@JvmRecord data class TableDefinition(val columns: LinkedHashMap) diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt new file mode 100644 index 000000000000..fc3ef9ea4796 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import io.airbyte.protocol.models.v0.DestinationSyncMode +import java.time.Instant + +/** + * Write configuration POJO (plain old java object) for all destinations extending + * [AbstractJdbcDestination]. + */ +class WriteConfig +@JvmOverloads +constructor( + val streamName: String, + /** + * This is used in [JdbcBufferedConsumerFactory] to verify that record is from expected streams + * + * @return + */ + val namespace: String, + val outputSchemaName: String, + val tmpTableName: String?, + val outputTableName: String?, + val syncMode: DestinationSyncMode, + val writeDatetime: Instant = Instant.now() +) { + override fun toString(): String { + return "WriteConfig{" + + "streamName=" + + streamName + + ", namespace=" + + namespace + + ", outputSchemaName=" + + outputSchemaName + + ", tmpTableName=" + + tmpTableName + + ", outputTableName=" + + outputTableName + + ", syncMode=" + + syncMode + + '}' + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt new file mode 100644 index 000000000000..67470d9df119 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.copy + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.db.factory.DataSourceFactory.close +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer +import io.airbyte.cdk.integrations.destination.StandardNameTransformer +import io.airbyte.cdk.integrations.destination.StreamSyncSummary +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.* +import io.airbyte.cdk.integrations.destination.jdbc.SqlOperations +import io.airbyte.cdk.integrations.destination.jdbc.constants.GlobalDataSizeConstants +import io.airbyte.cdk.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy +import io.airbyte.protocol.models.v0.* +import java.util.* +import java.util.function.Consumer +import javax.sql.DataSource +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +object CopyConsumerFactory { + private val LOGGER: Logger = LoggerFactory.getLogger(CopyConsumerFactory::class.java) + + fun create( + outputRecordCollector: Consumer, + dataSource: DataSource, + database: JdbcDatabase, + sqlOperations: SqlOperations, + namingResolver: StandardNameTransformer, + config: T, + catalog: ConfiguredAirbyteCatalog, + streamCopierFactory: StreamCopierFactory, + defaultSchema: String + ): AirbyteMessageConsumer { + val pairToCopier = + createWriteConfigs( + namingResolver, + config, + catalog, + streamCopierFactory, + defaultSchema, + database, + sqlOperations + ) + + val pairToIgnoredRecordCount: MutableMap = HashMap() + return BufferedStreamConsumer( + outputRecordCollector, + onStartFunction(pairToIgnoredRecordCount), + InMemoryRecordBufferingStrategy( + recordWriterFunction(pairToCopier, sqlOperations, pairToIgnoredRecordCount), + removeStagingFilePrinter(pairToCopier), + GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES.toLong() + ), + onCloseFunction( + pairToCopier, + database, + sqlOperations, + pairToIgnoredRecordCount, + dataSource + ), + catalog + ) { data: JsonNode? -> sqlOperations.isValidData(data) } + } + + private fun createWriteConfigs( + namingResolver: StandardNameTransformer, + config: T, + catalog: ConfiguredAirbyteCatalog, + streamCopierFactory: StreamCopierFactory, + defaultSchema: String, + database: JdbcDatabase, + sqlOperations: SqlOperations + ): Map { + val pairToCopier: MutableMap = HashMap() + val stagingFolder = UUID.randomUUID().toString() + for (configuredStream in catalog.streams) { + val stream = configuredStream.stream + val pair = AirbyteStreamNameNamespacePair.fromAirbyteStream(stream) + val copier = + streamCopierFactory.create( + defaultSchema, + config, + stagingFolder, + configuredStream, + namingResolver, + database, + sqlOperations + ) + + pairToCopier[pair] = copier + } + + return pairToCopier + } + + private fun onStartFunction( + pairToIgnoredRecordCount: MutableMap + ): OnStartFunction { + return OnStartFunction { pairToIgnoredRecordCount.clear() } + } + + private fun recordWriterFunction( + pairToCopier: Map, + sqlOperations: SqlOperations, + pairToIgnoredRecordCount: MutableMap + ): RecordWriter { + return RecordWriter { + pair: AirbyteStreamNameNamespacePair, + records: List -> + val fileName = pairToCopier[pair]!!.prepareStagingFile() + for (recordMessage in records) { + val id = UUID.randomUUID() + if (sqlOperations.isValidData(recordMessage.data)) { + // TODO Truncate json data instead of throwing whole record away? + // or should we upload it into a special rejected record folder in s3 instead? + pairToCopier[pair]!!.write(id, recordMessage, fileName) + } else { + pairToIgnoredRecordCount[pair] = + pairToIgnoredRecordCount.getOrDefault(pair, 0L) + 1L + } + } + } + } + + private fun removeStagingFilePrinter( + pairToCopier: Map + ): CheckAndRemoveRecordWriter { + return CheckAndRemoveRecordWriter { + pair: AirbyteStreamNameNamespacePair?, + stagingFileName: String? -> + val currentFileName = pairToCopier[pair]!!.currentFile + if ( + stagingFileName != null && + currentFileName != null && + stagingFileName != currentFileName + ) { + pairToCopier[pair]!!.closeNonCurrentStagingFileWriters() + } + currentFileName + } + } + + private fun onCloseFunction( + pairToCopier: Map, + database: JdbcDatabase, + sqlOperations: SqlOperations, + pairToIgnoredRecordCount: Map, + dataSource: DataSource + ): OnCloseFunction { + return OnCloseFunction { hasFailed: Boolean, _: Map? -> + pairToIgnoredRecordCount.forEach { (pair: AirbyteStreamNameNamespacePair?, count: Long?) + -> + LOGGER.warn( + "A total of {} record(s) of data from stream {} were invalid and were ignored.", + count, + pair + ) + } + closeAsOneTransaction(pairToCopier, hasFailed, database, sqlOperations, dataSource) + } + } + + @Throws(Exception::class) + private fun closeAsOneTransaction( + pairToCopier: Map, + hasFailed: Boolean, + db: JdbcDatabase, + sqlOperations: SqlOperations, + dataSource: DataSource + ) { + var hasFailed = hasFailed + var firstException: Exception? = null + val streamCopiers: List = ArrayList(pairToCopier.values) + try { + val queries: MutableList = ArrayList() + for (copier in streamCopiers) { + try { + copier!!.closeStagingUploader(hasFailed) + if (!hasFailed) { + copier.createDestinationSchema() + copier.createTemporaryTable() + copier.copyStagingFileToTemporaryTable() + val destTableName = copier.createDestinationTable() + val mergeQuery = copier.generateMergeStatement(destTableName) + queries.add(mergeQuery) + } + } catch (e: Exception) { + val message = + String.format("Failed to finalize copy to temp table due to: %s", e) + LOGGER.error(message) + hasFailed = true + if (firstException == null) { + firstException = e + } + } + } + if (!hasFailed) { + sqlOperations.executeTransaction(db, queries) + } + } finally { + for (copier in streamCopiers) { + copier!!.removeFileAndDropTmpTable() + } + + close(dataSource) + } + if (firstException != null) { + throw firstException + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt new file mode 100644 index 000000000000..f8f11b717d1c --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.copy + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.db.factory.DataSourceFactory.close +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.BaseConnector +import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility.emitConfigErrorTrace +import io.airbyte.cdk.integrations.base.Destination +import io.airbyte.cdk.integrations.base.errors.messages.ErrorMessage.getErrorMessage +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.destination.StandardNameTransformer +import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination +import io.airbyte.cdk.integrations.destination.jdbc.SqlOperations +import io.airbyte.commons.exceptions.ConnectionErrorException +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus +import javax.sql.DataSource +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +abstract class CopyDestination : BaseConnector, Destination { + /** + * The default database schema field in the destination config is "schema". To change it, pass + * the field name to the constructor. + */ + private var schemaFieldName = "schema" + + constructor() + + constructor(schemaFieldName: String) { + this.schemaFieldName = schemaFieldName + } + + /** + * A self contained method for writing a file to the persistence for testing. This method should + * try to clean up after itself by deleting the file it creates. + */ + @Throws(Exception::class) abstract fun checkPersistence(config: JsonNode?) + + abstract val nameTransformer: StandardNameTransformer + + abstract fun getDataSource(config: JsonNode?): DataSource + + abstract fun getDatabase(dataSource: DataSource?): JdbcDatabase + + abstract val sqlOperations: SqlOperations + + override fun check(config: JsonNode): AirbyteConnectionStatus? { + try { + checkPersistence(config) + } catch (e: Exception) { + LOGGER.error("Exception attempting to access the staging persistence: ", e) + return AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage( + """ + Could not connect to the staging persistence with the provided configuration. + ${e.message} + """.trimIndent() + ) + } + + val dataSource = getDataSource(config) + + try { + val database = getDatabase(dataSource) + val nameTransformer = nameTransformer + val outputSchema = nameTransformer.convertStreamName(config[schemaFieldName].asText()) + performCreateInsertTestOnDestination(outputSchema, database, nameTransformer) + + return AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED) + } catch (ex: ConnectionErrorException) { + LOGGER.info("Exception while checking connection: ", ex) + val message = getErrorMessage(ex.stateCode, ex.errorCode, ex.exceptionMessage, ex) + emitConfigErrorTrace(ex, message) + return AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage(message) + } catch (e: Exception) { + LOGGER.error("Exception attempting to connect to the warehouse: ", e) + return AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage( + """ + Could not connect to the warehouse with the provided configuration. + ${e.message} + """.trimIndent() + ) + } finally { + try { + close(dataSource) + } catch (e: Exception) { + LOGGER.warn("Unable to close data source.", e) + } + } + } + + @Throws(Exception::class) + protected fun performCreateInsertTestOnDestination( + outputSchema: String?, + database: JdbcDatabase, + nameTransformer: NamingConventionTransformer + ) { + AbstractJdbcDestination.Companion.attemptTableOperations( + outputSchema, + database, + nameTransformer, + sqlOperations, + true + ) + } + + companion object { + private val LOGGER: Logger = LoggerFactory.getLogger(CopyDestination::class.java) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt new file mode 100644 index 000000000000..0f23c527c2a3 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.copy + +import com.fasterxml.jackson.databind.JsonNode +import com.google.common.base.Preconditions +import io.airbyte.cdk.integrations.BaseConnector +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer +import io.airbyte.cdk.integrations.base.Destination +import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import java.util.* +import java.util.function.Consumer +import java.util.function.Function +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +/** + * Multiple configs may allow you to sync data to the destination in multiple ways. + * + * One primary example is that the default behavior for some DB-based destinations may use + * INSERT-based destinations while (given additional credentials) it may be able to sync data using + * a file copied to a staging location. + * + * This class exists to make it easy to define a destination in terms of multiple other destination + * implementations, switching between them based on the config provided. + */ +class SwitchingDestination>( + enumClass: Class, + configToType: Function, + typeToDestination: Map +) : BaseConnector(), Destination { + private val configToType: Function + private val typeToDestination: Map + + init { + val allEnumConstants: Set = HashSet(Arrays.asList(*enumClass.enumConstants)) + val supportedEnumConstants = typeToDestination.keys + + // check that it isn't possible for configToType to produce something we can't handle + Preconditions.checkArgument(allEnumConstants == supportedEnumConstants) + + this.configToType = configToType + this.typeToDestination = typeToDestination + } + + @Throws(Exception::class) + override fun check(config: JsonNode): AirbyteConnectionStatus? { + val destinationType = configToType.apply(config) + LOGGER.info("Using destination type: " + destinationType!!.name) + return typeToDestination[destinationType]!!.check(config) + } + + @Throws(Exception::class) + override fun getConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): AirbyteMessageConsumer? { + val destinationType = configToType.apply(config) + LOGGER.info("Using destination type: " + destinationType!!.name) + return typeToDestination[destinationType]!!.getConsumer( + config, + catalog, + outputRecordCollector + ) + } + + @Throws(Exception::class) + override fun getSerializedMessageConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): SerializedAirbyteMessageConsumer? { + val destinationType = configToType.apply(config) + LOGGER.info("Using destination type: " + destinationType!!.name) + return typeToDestination[destinationType]!!.getSerializedMessageConsumer( + config, + catalog, + outputRecordCollector + ) + } + + companion object { + private val LOGGER: Logger = LoggerFactory.getLogger(SwitchingDestination::class.java) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt new file mode 100644 index 000000000000..3ed79a50e1c1 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt @@ -0,0 +1,532 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.destination.jdbc.ColumnDefinition +import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition +import io.airbyte.cdk.integrations.util.ConnectorExceptionUtil.getResultsOrLogAndThrowFirst +import io.airbyte.commons.concurrency.CompletableFutures +import io.airbyte.commons.exceptions.SQLRuntimeException +import io.airbyte.commons.functional.CheckedFunction +import io.airbyte.commons.json.Jsons +import io.airbyte.integrations.base.destination.typing_deduping.* +import io.airbyte.integrations.base.destination.typing_deduping.Struct +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair +import java.sql.* +import java.time.Instant +import java.time.OffsetDateTime +import java.time.temporal.ChronoUnit +import java.util.* +import java.util.concurrent.CompletableFuture +import java.util.concurrent.CompletionStage +import java.util.function.Function +import java.util.function.Predicate +import java.util.stream.Collectors +import kotlin.collections.LinkedHashMap +import lombok.extern.slf4j.Slf4j +import org.jooq.Condition +import org.jooq.DSLContext +import org.jooq.SQLDialect +import org.jooq.conf.ParamType +import org.jooq.impl.DSL +import org.jooq.impl.SQLDataType +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +@Slf4j +abstract class JdbcDestinationHandler( + protected val databaseName: String, + protected val jdbcDatabase: JdbcDatabase, + protected val rawTableSchemaName: String, + private val dialect: SQLDialect +) : DestinationHandler { + protected val dslContext: DSLContext + get() = DSL.using(dialect) + + @Throws(Exception::class) + private fun findExistingTable(id: StreamId): Optional { + return findExistingTable(jdbcDatabase, databaseName, id.finalNamespace, id.finalName) + } + + @Throws(Exception::class) + private fun isFinalTableEmpty(id: StreamId): Boolean { + return !jdbcDatabase.queryBoolean( + dslContext + .select( + DSL.field( + DSL.exists( + DSL.selectOne().from(DSL.name(id.finalNamespace, id.finalName)).limit(1) + ) + ) + ) + .getSQL(ParamType.INLINED) + ) + } + + @Throws(Exception::class) + private fun getInitialRawTableState(id: StreamId): InitialRawTableStatus { + val tableExists = + jdbcDatabase.executeMetadataQuery { dbmetadata: DatabaseMetaData? -> + LOGGER.info( + "Retrieving table from Db metadata: {} {} {}", + databaseName, + id.rawNamespace, + id.rawName + ) + try { + dbmetadata!!.getTables(databaseName, id.rawNamespace, id.rawName, null).use { + table -> + return@executeMetadataQuery table.next() + } + } catch (e: SQLException) { + LOGGER.error("Failed to retrieve table info from metadata", e) + throw SQLRuntimeException(e) + } + } + if (!tableExists) { + // There's no raw table at all. Therefore there are no unprocessed raw records, and this + // sync + // should not filter raw records by timestamp. + return InitialRawTableStatus(false, false, Optional.empty()) + } + jdbcDatabase + .unsafeQuery( + CheckedFunction { conn: Connection -> + conn.prepareStatement( + dslContext + .select(DSL.field("MIN(_airbyte_extracted_at)").`as`("min_timestamp")) + .from(DSL.name(id.rawNamespace, id.rawName)) + .where(DSL.condition("_airbyte_loaded_at IS NULL")) + .sql + ) + }, + CheckedFunction { record: ResultSet -> record.getTimestamp("min_timestamp") } + ) + .use { timestampStream -> + // Filter for nonNull values in case the query returned NULL (i.e. no unloaded + // records). + val minUnloadedTimestamp: Optional = + timestampStream + .filter(Predicate { obj: Timestamp? -> Objects.nonNull(obj) }) + .findFirst() + if (minUnloadedTimestamp.isPresent) { + // Decrement by 1 second since timestamp precision varies between databases. + val ts = + minUnloadedTimestamp + .map { obj: Timestamp -> obj.toInstant() } + .map { i: Instant -> i.minus(1, ChronoUnit.SECONDS) } + return InitialRawTableStatus(true, true, ts) + } + } + jdbcDatabase + .unsafeQuery( + CheckedFunction { conn: Connection -> + conn.prepareStatement( + dslContext + .select(DSL.field("MAX(_airbyte_extracted_at)").`as`("min_timestamp")) + .from(DSL.name(id.rawNamespace, id.rawName)) + .sql + ) + }, + CheckedFunction { record: ResultSet -> record.getTimestamp("min_timestamp") } + ) + .use { timestampStream -> + // Filter for nonNull values in case the query returned NULL (i.e. no raw records at + // all). + val minUnloadedTimestamp: Optional = + timestampStream + .filter(Predicate { obj: Timestamp? -> Objects.nonNull(obj) }) + .findFirst() + return InitialRawTableStatus( + true, + false, + minUnloadedTimestamp.map { obj: Timestamp -> obj.toInstant() } + ) + } + } + + @Throws(Exception::class) + override fun execute(sql: Sql) { + val transactions: List> = sql.transactions + val queryId = UUID.randomUUID() + for (transaction in transactions) { + val transactionId = UUID.randomUUID() + LOGGER.info( + "Executing sql {}-{}: {}", + queryId, + transactionId, + java.lang.String.join("\n", transaction) + ) + val startTime = System.currentTimeMillis() + + try { + jdbcDatabase.executeWithinTransaction(transaction) + } catch (e: SQLException) { + LOGGER.error("Sql {}-{} failed", queryId, transactionId, e) + throw e + } + + LOGGER.info( + "Sql {}-{} completed in {} ms", + queryId, + transactionId, + System.currentTimeMillis() - startTime + ) + } + } + + @Throws(Exception::class) + override fun gatherInitialState( + streamConfigs: List + ): List> { + // Use stream n/ns pair because we don't want to build the full StreamId here + val destinationStatesFuture = + CompletableFuture.supplyAsync { + try { + return@supplyAsync allDestinationStates + } catch (e: SQLException) { + throw RuntimeException(e) + } + } + + val initialStates = + streamConfigs + .stream() + .map { streamConfig: StreamConfig -> + retrieveState(destinationStatesFuture, streamConfig) + } + .toList() + val states = CompletableFutures.allOf(initialStates).toCompletableFuture().join() + return getResultsOrLogAndThrowFirst("Failed to retrieve initial state", states) + } + + @get:Throws(SQLException::class) + protected val allDestinationStates: Map + get() { + // Guarantee the table exists. + jdbcDatabase.execute( + dslContext + .createTableIfNotExists( + DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME) + ) + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), + SQLDataType.VARCHAR + ) + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), + SQLDataType.VARCHAR + ) // Just use a string type, even if the destination has a json type. + // We're never going to query this column in a fancy way - all our processing + // can happen + // client-side. + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), + SQLDataType.VARCHAR + ) // Add an updated_at field. We don't actually need it yet, but it can't hurt! + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), + SQLDataType.TIMESTAMPWITHTIMEZONE + ) + .getSQL(ParamType.INLINED) + ) + // Fetch all records from it. We _could_ filter down to just our streams... but meh. + // This is small + // data. + return jdbcDatabase + .queryJsons( + dslContext + .select( + DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)), + DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE)), + DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE)) + ) + .from(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) + .sql + ) + .stream() + .collect( + Collectors.toMap( + Function { record: JsonNode -> + val nameNode = record[DESTINATION_STATE_TABLE_COLUMN_NAME] + val namespaceNode = record[DESTINATION_STATE_TABLE_COLUMN_NAMESPACE] + AirbyteStreamNameNamespacePair( + nameNode?.asText(), + namespaceNode?.asText() + ) + }, + Function { record: JsonNode -> + val stateNode = record[DESTINATION_STATE_TABLE_COLUMN_STATE] + val state = + if (stateNode != null) Jsons.deserialize(stateNode.asText()) + else Jsons.emptyObject() + toDestinationState(state) + } + ) + ) + } + + private fun retrieveState( + destinationStatesFuture: + CompletableFuture>, + streamConfig: StreamConfig? + ): CompletionStage> { + return destinationStatesFuture.thenApply { + destinationStates: Map -> + try { + val finalTableDefinition = findExistingTable(streamConfig!!.id) + val isSchemaMismatch: Boolean + val isFinalTableEmpty: Boolean + if (finalTableDefinition.isPresent) { + isSchemaMismatch = + !existingSchemaMatchesStreamConfig(streamConfig, finalTableDefinition.get()) + isFinalTableEmpty = isFinalTableEmpty(streamConfig.id) + } else { + // If the final table doesn't exist, then by definition it doesn't have a schema + // mismatch and has no + // records. + isSchemaMismatch = false + isFinalTableEmpty = true + } + val initialRawTableState = getInitialRawTableState(streamConfig.id) + val destinationState = + destinationStates.getOrDefault( + streamConfig.id.asPair(), + toDestinationState(Jsons.emptyObject()) + ) + return@thenApply DestinationInitialStatus( + streamConfig, + finalTableDefinition.isPresent, + initialRawTableState, + isSchemaMismatch, + isFinalTableEmpty, + destinationState + ) + } catch (e: Exception) { + throw RuntimeException(e) + } + } + } + + private fun isAirbyteRawIdColumnMatch(existingTable: TableDefinition): Boolean { + return existingTable.columns.containsKey(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID) && + toJdbcTypeName(AirbyteProtocolType.STRING) == + existingTable.columns[JavaBaseConstants.COLUMN_NAME_AB_RAW_ID]!!.type + } + + private fun isAirbyteExtractedAtColumnMatch(existingTable: TableDefinition): Boolean { + return existingTable.columns.containsKey(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT) && + toJdbcTypeName(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE) == + existingTable.columns[JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT]!!.type + } + + private fun isAirbyteMetaColumnMatch(existingTable: TableDefinition): Boolean { + return existingTable.columns.containsKey(JavaBaseConstants.COLUMN_NAME_AB_META) && + toJdbcTypeName(Struct(java.util.LinkedHashMap())) == + existingTable.columns[JavaBaseConstants.COLUMN_NAME_AB_META]!!.type + } + + protected fun existingSchemaMatchesStreamConfig( + stream: StreamConfig?, + existingTable: TableDefinition + ): Boolean { + // Check that the columns match, with special handling for the metadata columns. + if ( + !isAirbyteRawIdColumnMatch(existingTable) || + !isAirbyteExtractedAtColumnMatch(existingTable) || + !isAirbyteMetaColumnMatch(existingTable) + ) { + // Missing AB meta columns from final table, we need them to do proper T+D so trigger + // soft-reset + return false + } + val intendedColumns = + LinkedHashMap( + stream!!.columns!!.entries.associate { it.key.name to toJdbcTypeName(it.value) } + ) + + // Filter out Meta columns since they don't exist in stream config. + val actualColumns = + existingTable.columns.entries + .stream() + .filter { column: Map.Entry -> + JavaBaseConstants.V2_FINAL_TABLE_METADATA_COLUMNS.stream() + .noneMatch( + Predicate { airbyteColumnName: String -> + airbyteColumnName == column.key + } + ) + } + .collect( + { LinkedHashMap() }, + { + map: java.util.LinkedHashMap, + column: Map.Entry -> + map[column.key] = column.value.type + }, + { + obj: java.util.LinkedHashMap, + m: java.util.LinkedHashMap? -> + obj.putAll(m!!) + } + ) + + return actualColumns == intendedColumns + } + + @Throws(Exception::class) + override fun commitDestinationStates(destinationStates: Map) { + if (destinationStates.isEmpty()) { + return + } + + // Delete all state records where the stream name+namespace match one of our states + val deleteStates = + dslContext + .deleteFrom( + DSL.table(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) + ) + .where( + destinationStates.keys + .stream() + .map { streamId: StreamId -> + DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)) + .eq(streamId.originalName) + .and( + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE) + ) + .eq(streamId.originalNamespace) + ) + } + .reduce(DSL.falseCondition()) { obj: Condition, arg2: Condition? -> + obj.or(arg2) + } + ) + .getSQL(ParamType.INLINED) + + // Reinsert all of our states + var insertStatesStep = + dslContext + .insertInto( + DSL.table(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) + ) + .columns( + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), + String::class.java + ), + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), + String::class.java + ), + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), + String::class.java + ), // This field is a timestamptz, but it's easier to just insert a string + // and assume the destination can cast it appropriately. + // Destination-specific timestamp syntax is weird and annoying. + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), + String::class.java + ) + ) + for ((streamId, value) in destinationStates) { + val stateJson = Jsons.serialize(value) + insertStatesStep = + insertStatesStep.values( + streamId!!.originalName, + streamId.originalNamespace, + stateJson, + OffsetDateTime.now().toString() + ) + } + val insertStates = insertStatesStep.getSQL(ParamType.INLINED) + + jdbcDatabase.executeWithinTransaction(java.util.List.of(deleteStates, insertStates)) + } + + /** + * Convert to the TYPE_NAME retrieved from [java.sql.DatabaseMetaData.getColumns] + * + * @param airbyteType + * @return + */ + protected abstract fun toJdbcTypeName(airbyteType: AirbyteType?): String + + protected abstract fun toDestinationState(json: JsonNode?): DestinationState + + companion object { + private val LOGGER: Logger = LoggerFactory.getLogger(JdbcDestinationHandler::class.java) + private const val DESTINATION_STATE_TABLE_NAME = "_airbyte_destination_state" + private const val DESTINATION_STATE_TABLE_COLUMN_NAME = "name" + private const val DESTINATION_STATE_TABLE_COLUMN_NAMESPACE = "namespace" + private const val DESTINATION_STATE_TABLE_COLUMN_STATE = "destination_state" + private const val DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT = "updated_at" + + @Throws(SQLException::class) + fun findExistingTable( + jdbcDatabase: JdbcDatabase, + databaseName: String?, + schemaName: String?, + tableName: String? + ): Optional { + val retrievedColumnDefns = + jdbcDatabase.executeMetadataQuery { dbMetadata: DatabaseMetaData? -> + + // TODO: normalize namespace and finalName strings to quoted-lowercase (as + // needed. Snowflake + // requires uppercase) + val columnDefinitions = java.util.LinkedHashMap() + LOGGER.info( + "Retrieving existing columns for {}.{}.{}", + databaseName, + schemaName, + tableName + ) + try { + dbMetadata!!.getColumns(databaseName, schemaName, tableName, null).use { + columns -> + while (columns.next()) { + val columnName = columns.getString("COLUMN_NAME") + val typeName = columns.getString("TYPE_NAME") + val columnSize = columns.getInt("COLUMN_SIZE") + val isNullable = columns.getString("IS_NULLABLE") + columnDefinitions[columnName] = + ColumnDefinition( + columnName, + typeName, + columnSize, + fromIsNullableIsoString(isNullable) + ) + } + } + } catch (e: SQLException) { + LOGGER.error( + "Failed to retrieve column info for {}.{}.{}", + databaseName, + schemaName, + tableName, + e + ) + throw SQLRuntimeException(e) + } + columnDefinitions + } + // Guard to fail fast + if (retrievedColumnDefns.isEmpty()) { + return Optional.empty() + } + + return Optional.of(TableDefinition(retrievedColumnDefns)) + } + + fun fromIsNullableIsoString(isNullable: String?): Boolean { + return "YES".equals(isNullable, ignoreCase = true) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt new file mode 100644 index 000000000000..060e0b484f0c --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping + +import com.google.common.annotations.VisibleForTesting +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.integrations.base.destination.typing_deduping.* +import io.airbyte.integrations.base.destination.typing_deduping.Array +import io.airbyte.integrations.base.destination.typing_deduping.Sql.Companion.of +import io.airbyte.integrations.base.destination.typing_deduping.Sql.Companion.transactionally +import io.airbyte.integrations.base.destination.typing_deduping.StreamId.Companion.concatenateRawTableName +import io.airbyte.protocol.models.v0.DestinationSyncMode +import java.sql.Timestamp +import java.time.Instant +import java.util.* +import java.util.stream.Collectors +import java.util.stream.Stream +import kotlin.Any +import kotlin.Boolean +import kotlin.IllegalArgumentException +import kotlin.Int +import kotlin.String +import kotlin.UnsupportedOperationException +import kotlin.plus +import org.jooq.* +import org.jooq.conf.ParamType +import org.jooq.impl.DSL +import org.jooq.impl.SQLDataType + +abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventionTransformer) : + SqlGenerator { + protected val cdcDeletedAtColumn: ColumnId = buildColumnId("_ab_cdc_deleted_at") + + override fun buildStreamId( + namespace: String, + name: String, + rawNamespaceOverride: String + ): StreamId { + return StreamId( + namingTransformer.getNamespace(namespace), + namingTransformer.convertStreamName(name), + namingTransformer.getNamespace(rawNamespaceOverride), + namingTransformer.convertStreamName(concatenateRawTableName(namespace, name)), + namespace, + name + ) + } + + override fun buildColumnId(name: String, suffix: String?): ColumnId { + val nameWithSuffix = name + suffix + return ColumnId( + namingTransformer.getIdentifier(nameWithSuffix), + name, + namingTransformer.getIdentifier(nameWithSuffix) + ) + } + + protected fun toDialectType(type: AirbyteType): DataType<*> { + if (type is AirbyteProtocolType) { + return toDialectType(type) + } + return when (type.typeName) { + Struct.TYPE, + UnsupportedOneOf.TYPE -> structType + Array.TYPE -> arrayType!! + Union.TYPE -> toDialectType((type as Union).chooseType()) + else -> throw IllegalArgumentException("Unsupported AirbyteType: $type") + } + } + + @VisibleForTesting + fun toDialectType(airbyteProtocolType: AirbyteProtocolType): DataType<*> { + return when (airbyteProtocolType) { + AirbyteProtocolType.STRING -> SQLDataType.VARCHAR(65535) + AirbyteProtocolType.NUMBER -> SQLDataType.DECIMAL(38, 9) + AirbyteProtocolType.INTEGER -> SQLDataType.BIGINT + AirbyteProtocolType.BOOLEAN -> SQLDataType.BOOLEAN + AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE -> SQLDataType.TIMESTAMPWITHTIMEZONE + AirbyteProtocolType.TIMESTAMP_WITHOUT_TIMEZONE -> SQLDataType.TIMESTAMP + AirbyteProtocolType.TIME_WITH_TIMEZONE -> SQLDataType.TIMEWITHTIMEZONE + AirbyteProtocolType.TIME_WITHOUT_TIMEZONE -> SQLDataType.TIME + AirbyteProtocolType.DATE -> SQLDataType.DATE + AirbyteProtocolType.UNKNOWN -> widestType!! + } + } + + protected abstract val structType: DataType<*> + get + + protected abstract val arrayType: DataType<*>? + get + + @get:VisibleForTesting + val timestampWithTimeZoneType: DataType<*> + get() = toDialectType(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE) + + protected abstract val widestType: DataType<*>? + get + + protected abstract val dialect: SQLDialect? + get + + /** + * @param columns from the schema to be extracted from _airbyte_data column. Use the destination + * specific syntax to extract data + * @param useExpensiveSaferCasting + * @return a list of jooq fields for the final table insert statement. + */ + protected abstract fun extractRawDataFields( + columns: LinkedHashMap, + useExpensiveSaferCasting: Boolean + ): MutableList> + + /** + * + * @param columns from the schema to be used for type casting errors and construct _airbyte_meta + * column + * @return + */ + protected abstract fun buildAirbyteMetaColumn( + columns: LinkedHashMap + ): Field<*>? + + /** + * Get the cdc_deleted_at column condition for append_dedup mode by extracting it from + * _airbyte_data column in raw table. + * + * @return + */ + protected abstract fun cdcDeletedAtNotNullCondition(): Condition? + + /** + * Get the window step function row_number() over (partition by primary_key order by + * cursor_field) as row_number. + * + * @param primaryKey list of primary keys + * @param cursorField cursor field used for ordering + * @return + */ + protected abstract fun getRowNumber( + primaryKey: List?, + cursorField: Optional + ): Field + + protected val dslContext: DSLContext + get() = DSL.using(dialect) + + /** + * build jooq fields for final table with customers columns first and then meta columns. + * + * @param columns + * @param metaColumns + * @return + */ + @VisibleForTesting + fun buildFinalTableFields( + columns: LinkedHashMap, + metaColumns: Map?> + ): List> { + val fields = + metaColumns.entries + .stream() + .map { metaColumn: Map.Entry?> -> + DSL.field(DSL.quotedName(metaColumn.key), metaColumn.value) + } + .collect(Collectors.toList()) + val dataFields = + columns.entries + .stream() + .map { column: Map.Entry -> + DSL.field(DSL.quotedName(column.key!!.name), toDialectType(column.value)) + } + .collect(Collectors.toList()) + dataFields.addAll(fields) + return dataFields + } + + /** + * Use this method to get the final table meta columns with or without _airbyte_meta column. + * + * @param includeMetaColumn + * @return + */ + fun getFinalTableMetaColumns(includeMetaColumn: Boolean): LinkedHashMap?> { + val metaColumns = LinkedHashMap?>() + metaColumns[JavaBaseConstants.COLUMN_NAME_AB_RAW_ID] = + SQLDataType.VARCHAR(36).nullable(false) + metaColumns[JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT] = + timestampWithTimeZoneType.nullable(false) + if (includeMetaColumn) + metaColumns[JavaBaseConstants.COLUMN_NAME_AB_META] = structType.nullable(false) + return metaColumns + } + + /** + * build jooq fields for raw table with type-casted data columns first and then meta columns + * without _airbyte_meta. + * + * @param columns + * @param metaColumns + * @return + */ + @VisibleForTesting + fun buildRawTableSelectFields( + columns: LinkedHashMap, + metaColumns: Map?>, + useExpensiveSaferCasting: Boolean + ): List> { + val fields = + metaColumns.entries + .stream() + .map { metaColumn: Map.Entry?> -> + DSL.field(DSL.quotedName(metaColumn.key), metaColumn.value) + } + .collect(Collectors.toList()) + // Use originalName with non-sanitized characters when extracting data from _airbyte_data + val dataFields = extractRawDataFields(columns, useExpensiveSaferCasting) + dataFields.addAll(fields) + return dataFields + } + + @VisibleForTesting + fun rawTableCondition( + syncMode: DestinationSyncMode, + isCdcDeletedAtPresent: Boolean, + minRawTimestamp: Optional + ): Condition { + var condition: Condition = + DSL.field(DSL.name(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT)).isNull() + if (syncMode == DestinationSyncMode.APPEND_DEDUP) { + if (isCdcDeletedAtPresent) { + condition = condition.or(cdcDeletedAtNotNullCondition()) + } + } + if (minRawTimestamp.isPresent) { + condition = + condition.and( + DSL.field(DSL.name(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)) + .gt(minRawTimestamp.get().toString()) + ) + } + return condition + } + + override fun createSchema(schema: String?): Sql { + return of(createSchemaSql(schema)) + } + + override fun createTable(stream: StreamConfig, suffix: String, force: Boolean): Sql { + // TODO: Use Naming transformer to sanitize these strings with redshift restrictions. + val finalTableIdentifier = stream.id.finalName + suffix.lowercase(Locale.getDefault()) + if (!force) { + return transactionally( + Stream.concat( + Stream.of( + createTableSql( + stream.id.finalNamespace, + finalTableIdentifier, + stream.columns!! + ) + ), + createIndexSql(stream, suffix).stream() + ) + .toList() + ) + } + return transactionally( + Stream.concat( + Stream.of( + DSL.dropTableIfExists( + DSL.quotedName(stream.id.finalNamespace, finalTableIdentifier) + ) + .getSQL(ParamType.INLINED), + createTableSql( + stream.id.finalNamespace, + finalTableIdentifier, + stream.columns!! + ) + ), + createIndexSql(stream, suffix).stream() + ) + .toList() + ) + } + + override fun updateTable( + streamConfig: StreamConfig, + finalSuffix: String?, + minRawTimestamp: Optional, + useExpensiveSaferCasting: Boolean + ): Sql { + // TODO: Add flag to use merge vs insert/delete + + return insertAndDeleteTransaction( + streamConfig, + finalSuffix, + minRawTimestamp, + useExpensiveSaferCasting + ) + } + + override fun overwriteFinalTable(stream: StreamId, finalSuffix: String?): Sql { + return transactionally( + DSL.dropTableIfExists(DSL.name(stream.finalNamespace, stream.finalName)) + .getSQL(ParamType.INLINED), + DSL.alterTable(DSL.name(stream.finalNamespace, stream.finalName + finalSuffix)) + .renameTo(DSL.name(stream.finalName)) + .sql + ) + } + + override fun migrateFromV1toV2( + streamId: StreamId, + namespace: String?, + tableName: String? + ): Sql { + val rawTableName = DSL.name(streamId.rawNamespace, streamId.rawName) + val dsl = dslContext + return transactionally( + dsl.createSchemaIfNotExists(streamId.rawNamespace).sql, + dsl.dropTableIfExists(rawTableName).sql, + DSL.createTable(rawTableName) + .column( + JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, + SQLDataType.VARCHAR(36).nullable(false) + ) + .column( + JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, + timestampWithTimeZoneType.nullable(false) + ) + .column( + JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT, + timestampWithTimeZoneType.nullable(true) + ) + .column(JavaBaseConstants.COLUMN_NAME_DATA, structType.nullable(false)) + .column(JavaBaseConstants.COLUMN_NAME_AB_META, structType.nullable(true)) + .`as`( + DSL.select( + DSL.field(JavaBaseConstants.COLUMN_NAME_AB_ID) + .`as`(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID), + DSL.field(JavaBaseConstants.COLUMN_NAME_EMITTED_AT) + .`as`(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT), + DSL.cast(null, timestampWithTimeZoneType) + .`as`(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT), + DSL.field(JavaBaseConstants.COLUMN_NAME_DATA) + .`as`(JavaBaseConstants.COLUMN_NAME_DATA), + DSL.cast(null, structType).`as`(JavaBaseConstants.COLUMN_NAME_AB_META) + ) + .from(DSL.table(DSL.name(namespace, tableName))) + ) + .getSQL(ParamType.INLINED) + ) + } + + override fun clearLoadedAt(streamId: StreamId): Sql { + return of( + DSL.update(DSL.table(DSL.name(streamId.rawNamespace, streamId.rawName))) + .set( + DSL.field(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT), + DSL.inline(null as String?) + ) + .sql + ) + } + + @VisibleForTesting + fun selectFromRawTable( + schemaName: String?, + tableName: String?, + columns: LinkedHashMap, + metaColumns: Map?>, + condition: Condition?, + useExpensiveSaferCasting: Boolean + ): SelectConditionStep { + val dsl = dslContext + return dsl.select(buildRawTableSelectFields(columns, metaColumns, useExpensiveSaferCasting)) + .select(buildAirbyteMetaColumn(columns)) + .from(DSL.table(DSL.quotedName(schemaName, tableName))) + .where(condition) + } + + @VisibleForTesting + fun insertIntoFinalTable( + schemaName: String?, + tableName: String?, + columns: LinkedHashMap, + metaFields: Map?> + ): InsertValuesStepN { + val dsl = dslContext + return dsl.insertInto(DSL.table(DSL.quotedName(schemaName, tableName))) + .columns(buildFinalTableFields(columns, metaFields)) + } + + private fun insertAndDeleteTransaction( + streamConfig: StreamConfig, + finalSuffix: String?, + minRawTimestamp: Optional, + useExpensiveSaferCasting: Boolean + ): Sql { + val finalSchema = streamConfig.id.finalNamespace + val finalTable = + streamConfig.id.finalName + (finalSuffix?.lowercase(Locale.getDefault()) ?: "") + val rawSchema = streamConfig.id.rawNamespace + val rawTable = streamConfig.id.rawName + + // Poor person's guarantee of ordering of fields by using same source of ordered list of + // columns to + // generate fields. + val rawTableRowsWithCast = + DSL.name(TYPING_CTE_ALIAS) + .`as`( + selectFromRawTable( + rawSchema, + rawTable, + streamConfig.columns!!, + getFinalTableMetaColumns(false), + rawTableCondition( + streamConfig.destinationSyncMode!!, + streamConfig.columns!!.containsKey(cdcDeletedAtColumn), + minRawTimestamp + ), + useExpensiveSaferCasting + ) + ) + val finalTableFields = + buildFinalTableFields(streamConfig.columns!!, getFinalTableMetaColumns(true)) + val rowNumber = getRowNumber(streamConfig.primaryKey, streamConfig.cursor!!) + val filteredRows = + DSL.name(NUMBERED_ROWS_CTE_ALIAS) + .`as`(DSL.select(DSL.asterisk(), rowNumber).from(rawTableRowsWithCast)) + + // Used for append-dedupe mode. + val insertStmtWithDedupe = + insertIntoFinalTable( + finalSchema, + finalTable, + streamConfig.columns!!, + getFinalTableMetaColumns(true) + ) + .select( + DSL.with(rawTableRowsWithCast) + .with(filteredRows) + .select(finalTableFields) + .from(filteredRows) + .where( + DSL.field(DSL.name(ROW_NUMBER_COLUMN_NAME), Int::class.java).eq(1) + ) // Can refer by CTE.field but no use since we don't strongly type + // them. + ) + .getSQL(ParamType.INLINED) + + // Used for append and overwrite modes. + val insertStmt = + insertIntoFinalTable( + finalSchema, + finalTable, + streamConfig.columns!!, + getFinalTableMetaColumns(true) + ) + .select( + DSL.with(rawTableRowsWithCast) + .select(finalTableFields) + .from(rawTableRowsWithCast) + ) + .getSQL(ParamType.INLINED) + val deleteStmt = + deleteFromFinalTable( + finalSchema, + finalTable, + streamConfig.primaryKey!!, + streamConfig.cursor!! + ) + val deleteCdcDeletesStmt = + if (streamConfig.columns!!.containsKey(cdcDeletedAtColumn)) + deleteFromFinalTableCdcDeletes(finalSchema, finalTable) + else "" + val checkpointStmt = checkpointRawTable(rawSchema, rawTable, minRawTimestamp) + + if (streamConfig.destinationSyncMode != DestinationSyncMode.APPEND_DEDUP) { + return transactionally(insertStmt, checkpointStmt) + } + + // For append-dedupe + return transactionally( + insertStmtWithDedupe, + deleteStmt, + deleteCdcDeletesStmt, + checkpointStmt + ) + } + + private fun mergeTransaction( + streamConfig: StreamConfig, + finalSuffix: String, + minRawTimestamp: Optional, + useExpensiveSaferCasting: Boolean + ): String { + throw UnsupportedOperationException("Not implemented yet") + } + + protected fun createSchemaSql(namespace: String?): String { + val dsl = dslContext + val createSchemaSql = dsl.createSchemaIfNotExists(DSL.quotedName(namespace)) + return createSchemaSql.sql + } + + protected fun createTableSql( + namespace: String?, + tableName: String?, + columns: LinkedHashMap + ): String { + val dsl = dslContext + val createTableSql = + dsl.createTable(DSL.quotedName(namespace, tableName)) + .columns(buildFinalTableFields(columns, getFinalTableMetaColumns(true))) + return createTableSql.sql + } + + /** + * Subclasses may override this method to add additional indexes after their CREATE TABLE + * statement. This is useful if the destination's CREATE TABLE statement does not accept an + * index definition. + */ + protected fun createIndexSql(stream: StreamConfig?, suffix: String?): List { + return emptyList() + } + + protected fun beginTransaction(): String { + return "BEGIN" + } + + protected fun commitTransaction(): String { + return "COMMIT" + } + + private fun commitTransactionInternal(): String { + return commitTransaction() + ";" + } + + private fun deleteFromFinalTable( + schemaName: String?, + tableName: String, + primaryKeys: List, + cursor: Optional + ): String { + val dsl = dslContext + // Unknown type doesn't play well with where .. in (select..) + val airbyteRawId: Field = + DSL.field(DSL.quotedName(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID)) + val rowNumber = getRowNumber(primaryKeys, cursor) + return dsl.deleteFrom(DSL.table(DSL.quotedName(schemaName, tableName))) + .where( + airbyteRawId.`in`( + DSL.select(airbyteRawId) + .from( + DSL.select(airbyteRawId, rowNumber) + .from(DSL.table(DSL.quotedName(schemaName, tableName))) + .asTable("airbyte_ids") + ) + .where(DSL.field(DSL.name(ROW_NUMBER_COLUMN_NAME)).ne(1)) + ) + ) + .getSQL(ParamType.INLINED) + } + + private fun deleteFromFinalTableCdcDeletes(schema: String?, tableName: String): String { + val dsl = dslContext + return dsl.deleteFrom(DSL.table(DSL.quotedName(schema, tableName))) + .where(DSL.field(DSL.quotedName(cdcDeletedAtColumn.name)).isNotNull()) + .getSQL(ParamType.INLINED) + } + + private fun checkpointRawTable( + schemaName: String?, + tableName: String?, + minRawTimestamp: Optional + ): String { + val dsl = dslContext + var extractedAtCondition = DSL.noCondition() + if (minRawTimestamp.isPresent) { + extractedAtCondition = + extractedAtCondition.and( + DSL.field(DSL.name(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)) + .gt(minRawTimestamp.get().toString()) + ) + } + return dsl.update(DSL.table(DSL.quotedName(schemaName, tableName))) + .set( + DSL.field(DSL.quotedName(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT)), + currentTimestamp() + ) + .where(DSL.field(DSL.quotedName(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT)).isNull()) + .and(extractedAtCondition) + .getSQL(ParamType.INLINED) + } + + protected fun castedField( + field: Field<*>?, + type: AirbyteType, + alias: String?, + useExpensiveSaferCasting: Boolean + ): Field<*> { + if (type is AirbyteProtocolType) { + return castedField(field, type, useExpensiveSaferCasting).`as`(DSL.quotedName(alias)) + } + + // Redshift SUPER can silently cast an array type to struct and vice versa. + return when (type.typeName) { + Struct.TYPE, + UnsupportedOneOf.TYPE -> DSL.cast(field, structType).`as`(DSL.quotedName(alias)) + Array.TYPE -> DSL.cast(field, arrayType).`as`(DSL.quotedName(alias)) + Union.TYPE -> + castedField(field, (type as Union).chooseType(), alias, useExpensiveSaferCasting) + else -> throw IllegalArgumentException("Unsupported AirbyteType: $type") + } + } + + protected fun castedField( + field: Field<*>?, + type: AirbyteProtocolType, + useExpensiveSaferCasting: Boolean + ): Field<*> { + return DSL.cast(field, toDialectType(type)) + } + + protected fun currentTimestamp(): Field { + return DSL.currentTimestamp() + } + + companion object { + protected const val ROW_NUMBER_COLUMN_NAME: String = "row_number" + private const val TYPING_CTE_ALIAS = "intermediate_data" + private const val NUMBERED_ROWS_CTE_ALIAS = "numbered_rows" + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt new file mode 100644 index 000000000000..d635050fe271 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping + +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition +import io.airbyte.commons.exceptions.SQLRuntimeException +import io.airbyte.integrations.base.destination.typing_deduping.BaseDestinationV1V2Migrator +import io.airbyte.integrations.base.destination.typing_deduping.NamespacedTableName +import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig +import java.sql.DatabaseMetaData +import java.sql.SQLException +import java.util.* +import lombok.SneakyThrows + +/** + * Largely based on + * [io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV1V2Migrator]. + */ +class JdbcV1V2Migrator( + private val namingConventionTransformer: NamingConventionTransformer, + private val database: JdbcDatabase, + private val databaseName: String +) : BaseDestinationV1V2Migrator() { + @SneakyThrows + override fun doesAirbyteInternalNamespaceExist(streamConfig: StreamConfig?): Boolean { + val retrievedSchema = + database.executeMetadataQuery { dbMetadata: DatabaseMetaData? -> + try { + dbMetadata!!.getSchemas(databaseName, streamConfig!!.id.rawNamespace).use { + columns -> + var schema = "" + while (columns.next()) { + // Catalog can be null, so don't do anything with it. + // columns.getString("TABLE_CATALOG"); + schema = columns.getString("TABLE_SCHEM") + } + return@executeMetadataQuery schema + } + } catch (e: SQLException) { + throw SQLRuntimeException(e) + } + } + + return !retrievedSchema.isEmpty() + } + + override fun schemaMatchesExpectation( + existingTable: TableDefinition, + columns: Collection + ): Boolean { + return existingTable.columns.keys.containsAll(columns) + } + + @SneakyThrows + @Throws(Exception::class) + override fun getTableIfExists( + namespace: String?, + tableName: String? + ): Optional { + return JdbcDestinationHandler.Companion.findExistingTable( + database, + databaseName, + namespace, + tableName + ) + } + + override fun convertToV1RawName(streamConfig: StreamConfig): NamespacedTableName { + @Suppress("deprecation") + val tableName = namingConventionTransformer.getRawTableName(streamConfig.id.originalName!!) + return NamespacedTableName( + namingConventionTransformer.getIdentifier(streamConfig.id.originalNamespace!!), + tableName + ) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt similarity index 66% rename from airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt rename to airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt index ae214c8bbdf8..84b4dc6cb17b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt @@ -19,28 +19,32 @@ import org.jooq.SQLDialect * TyperDeduper classes. This implementation appeases that requirement but does not implement any * "final" table operations. */ -class RawOnlySqlGenerator(private val namingTransformer: NamingConventionTransformer) : +class RawOnlySqlGenerator(namingTransformer: NamingConventionTransformer) : JdbcSqlGenerator(namingTransformer) { - override fun getStructType(): DataType<*>? { - throw NotImplementedError("This Destination does not support final tables") - } + override val structType: DataType<*> + get() { + throw NotImplementedError("This Destination does not support final tables") + } - override fun getArrayType(): DataType<*>? { - throw NotImplementedError("This Destination does not support final tables") - } + override val arrayType: DataType<*>? + get() { + throw NotImplementedError("This Destination does not support final tables") + } - override fun getWidestType(): DataType<*>? { - throw NotImplementedError("This Destination does not support final tables") - } + override val widestType: DataType<*>? + get() { + throw NotImplementedError("This Destination does not support final tables") + } - override fun getDialect(): SQLDialect? { - throw NotImplementedError("This Destination does not support final tables") - } + override val dialect: SQLDialect? + get() { + throw NotImplementedError("This Destination does not support final tables") + } override fun extractRawDataFields( columns: LinkedHashMap, useExpensiveSaferCasting: Boolean, - ): List>? { + ): MutableList> { throw NotImplementedError("This Destination does not support final tables") } @@ -53,9 +57,9 @@ class RawOnlySqlGenerator(private val namingTransformer: NamingConventionTransfo } override fun getRowNumber( - primaryKey: List, + primaryKey: List?, cursorField: Optional, - ): Field? { + ): Field { throw NotImplementedError("This Destination does not support final tables") } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt new file mode 100644 index 000000000000..e516d50eef01 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.staging + +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.destination.StreamSyncSummary +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnCloseFunction +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.OnStartFunction +import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig +import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve +import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair +import io.airbyte.protocol.models.v0.DestinationSyncMode +import io.airbyte.protocol.models.v0.StreamDescriptor +import io.github.oshai.kotlinlogging.KotlinLogging +import java.util.* + +private val log = KotlinLogging.logger {} +/** Functions and logic common to all flushing strategies. */ +object GeneralStagingFunctions { + // using a random string here as a placeholder for the moment. + // This would avoid mixing data in the staging area between different syncs (especially if they + // manipulate streams with similar names) + // if we replaced the random connection id by the actual connection_id, we'd gain the + // opportunity to + // leverage data that was uploaded to stage + // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) + // instead. + // This would also allow other programs/scripts + // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. + val RANDOM_CONNECTION_ID: UUID = UUID.randomUUID() + + fun onStartFunction( + database: JdbcDatabase, + stagingOperations: StagingOperations, + writeConfigs: List, + typerDeduper: TyperDeduper + ): OnStartFunction { + return OnStartFunction { + log.info( + "Preparing raw tables in destination started for {} streams", + writeConfigs.size + ) + typerDeduper.prepareSchemasAndRunMigrations() + + // Create raw tables + val queryList: MutableList = ArrayList() + for (writeConfig in writeConfigs) { + val schema = writeConfig.outputSchemaName + val stream = writeConfig.streamName + val dstTableName = writeConfig.outputTableName + val stageName = stagingOperations.getStageName(schema, dstTableName) + val stagingPath = + stagingOperations.getStagingPath( + SerialStagingConsumerFactory.Companion.RANDOM_CONNECTION_ID, + schema, + stream, + writeConfig.outputTableName, + writeConfig.writeDatetime + ) + + log.info( + "Preparing staging area in destination started for schema {} stream {}: target table: {}, stage: {}", + schema, + stream, + dstTableName, + stagingPath + ) + + stagingOperations.createSchemaIfNotExists(database, schema) + stagingOperations.createTableIfNotExists(database, schema, dstTableName) + stagingOperations.createStageIfNotExists(database, stageName) + + when (writeConfig.syncMode) { + DestinationSyncMode.OVERWRITE -> + queryList.add( + stagingOperations.truncateTableQuery(database, schema, dstTableName) + ) + DestinationSyncMode.APPEND, + DestinationSyncMode.APPEND_DEDUP -> {} + else -> + throw IllegalStateException( + "Unrecognized sync mode: " + writeConfig.syncMode + ) + } + log.info( + "Preparing staging area in destination completed for schema {} stream {}", + schema, + stream + ) + } + + typerDeduper.prepareFinalTables() + + log.info("Executing finalization of tables.") + stagingOperations.executeTransaction(database, queryList) + } + } + + /** + * Handles copying data from staging area to destination table and clean up of staged files if + * upload was unsuccessful + */ + @Throws(Exception::class) + fun copyIntoTableFromStage( + database: JdbcDatabase?, + stageName: String?, + stagingPath: String?, + stagedFiles: List?, + tableName: String?, + schemaName: String?, + stagingOperations: StagingOperations, + streamNamespace: String?, + streamName: String?, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper + ) { + try { + val rawTableInsertLock = + typerDeduper.getRawTableInsertLock(streamNamespace!!, streamName!!) + rawTableInsertLock.lock() + try { + stagingOperations.copyIntoTableFromStage( + database, + stageName, + stagingPath, + stagedFiles, + tableName, + schemaName + ) + } finally { + rawTableInsertLock.unlock() + } + + val streamId = AirbyteStreamNameNamespacePair(streamName, streamNamespace) + typerDeduperValve.addStreamIfAbsent(streamId) + if (typerDeduperValve.readyToTypeAndDedupe(streamId)) { + typerDeduper.typeAndDedupe(streamId.namespace, streamId.name, false) + typerDeduperValve.updateTimeAndIncreaseInterval(streamId) + } + } catch (e: Exception) { + throw RuntimeException("Failed to upload data from stage $stagingPath", e) + } + } + + /** + * Tear down process, will attempt to try to clean out any staging area + * + * @param database database used for syncing + * @param stagingOperations collection of SQL queries necessary for writing data into a staging + * area + * @param writeConfigs configuration settings for all destination connectors needed to write + * @param purgeStagingData drop staging area if true, keep otherwise + * @return + */ + fun onCloseFunction( + database: JdbcDatabase?, + stagingOperations: StagingOperations, + writeConfigs: List, + purgeStagingData: Boolean, + typerDeduper: TyperDeduper + ): OnCloseFunction { + return OnCloseFunction { + hasFailed: Boolean, + streamSyncSummaries: Map -> + // After moving data from staging area to the target table (airybte_raw) clean up the + // staging + // area (if user configured) + log.info("Cleaning up destination started for {} streams", writeConfigs.size) + typerDeduper.typeAndDedupe(streamSyncSummaries) + for (writeConfig in writeConfigs) { + val schemaName = writeConfig.outputSchemaName + if (purgeStagingData) { + val stageName = + stagingOperations.getStageName(schemaName, writeConfig.outputTableName) + val stagePath = + stagingOperations.getStagingPath( + RANDOM_CONNECTION_ID, + schemaName, + writeConfig.streamName, + writeConfig.outputTableName, + writeConfig.writeDatetime + ) + log.info( + "Cleaning stage in destination started for stream {}. schema {}, stage: {}", + writeConfig.streamName, + schemaName, + stagePath + ) + // TODO: This is another weird manifestation of Redshift vs Snowflake using + // either or variables from + // stageName/StagingPath. + stagingOperations.dropStageIfExists(database, stageName, stagePath) + } + } + typerDeduper.commitFinalTables() + typerDeduper.cleanup() + log.info("Cleaning up destination completed.") + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt new file mode 100644 index 000000000000..335cc1fa004d --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.staging + +import com.google.common.annotations.VisibleForTesting +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig +import io.airbyte.cdk.integrations.destination.record_buffer.FlushBufferFunction +import io.airbyte.cdk.integrations.destination.record_buffer.SerializableBuffer +import io.airbyte.commons.exceptions.ConfigErrorException +import io.airbyte.commons.json.Jsons +import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve +import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.github.oshai.kotlinlogging.KotlinLogging +import java.util.stream.Collectors +import org.apache.commons.io.FileUtils + +private val log = KotlinLogging.logger {} + +/** + * Serial flushing logic. Though simpler, this causes unnecessary backpressure and slows down the + * entire pipeline. + * + * Note: This class should be re-written so that is implements the [FlushBufferFunction] interface, + * instead of return an anonymous function implementing this interface for clarity. As of this + * writing, we avoid doing so to simplify the migration to async flushing. + */ +object SerialFlush { + /** + * Logic handling how destinations with staging areas (aka bucket storages) will flush their + * buffer + * + * @param database database used for syncing + * @param stagingOperations collection of SQL queries necessary for writing data into a staging + * area + * @param writeConfigs configuration settings for all destination connectors needed to write + * @param catalog collection of configured streams (e.g. API endpoints or database tables) + * @return + */ + @VisibleForTesting + fun function( + database: JdbcDatabase?, + stagingOperations: StagingOperations, + writeConfigs: List, + catalog: ConfiguredAirbyteCatalog, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper + ): FlushBufferFunction { + // TODO: (ryankfu) move this block of code that executes before the lambda to + // #onStartFunction + val conflictingStreams: MutableSet = HashSet() + val pairToWriteConfig: MutableMap = HashMap() + for (config in writeConfigs) { + val streamIdentifier = toNameNamespacePair(config) + if (pairToWriteConfig.containsKey(streamIdentifier)) { + conflictingStreams.add(config) + val existingConfig = pairToWriteConfig.getValue(streamIdentifier) + // The first conflicting stream won't have any problems, so we need to explicitly + // add it here. + conflictingStreams.add(existingConfig) + } else { + pairToWriteConfig[streamIdentifier] = config + } + } + if (!conflictingStreams.isEmpty()) { + val message = + String.format( + "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using \${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: %s", + conflictingStreams + .stream() + .map { config: WriteConfig -> config.namespace + "." + config.streamName } + .collect(Collectors.joining(", ")) + ) + throw ConfigErrorException(message) + } + return FlushBufferFunction { + pair: AirbyteStreamNameNamespacePair, + writer: SerializableBuffer -> + log.info( + "Flushing buffer for stream {} ({}) to staging", + pair.name, + FileUtils.byteCountToDisplaySize(writer.byteCount) + ) + require(pairToWriteConfig.containsKey(pair)) { + String.format( + "Message contained record from a stream that was not in the catalog. \ncatalog: %s", + Jsons.serialize(catalog) + ) + } + + val writeConfig = pairToWriteConfig.getValue(pair) + val schemaName = writeConfig.outputSchemaName + val stageName = stagingOperations.getStageName(schemaName, writeConfig.outputTableName) + val stagingPath = + stagingOperations.getStagingPath( + SerialStagingConsumerFactory.Companion.RANDOM_CONNECTION_ID, + schemaName, + writeConfig.streamName, + writeConfig.outputTableName, + writeConfig.writeDatetime + ) + try { + writer.use { + writer.flush() + val stagedFile = + stagingOperations.uploadRecordsToStage( + database, + writer, + schemaName, + stageName, + stagingPath + ) + GeneralStagingFunctions.copyIntoTableFromStage( + database, + stageName, + stagingPath, + java.util.List.of(stagedFile), + writeConfig.outputTableName, + schemaName, + stagingOperations, + writeConfig.namespace, + writeConfig.streamName, + typerDeduperValve, + typerDeduper + ) + } + } catch (e: Exception) { + log.error("Failed to flush and commit buffer data into destination's raw table", e) + throw RuntimeException( + "Failed to upload buffer to stage and commit to destination", + e + ) + } + } + } + + private fun toNameNamespacePair(config: WriteConfig): AirbyteStreamNameNamespacePair { + return AirbyteStreamNameNamespacePair(config.streamName, config.namespace) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt new file mode 100644 index 000000000000..ac19faf27523 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.staging + +import com.fasterxml.jackson.databind.JsonNode +import com.google.common.base.Preconditions +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer +import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig +import io.airbyte.cdk.integrations.destination.record_buffer.BufferCreateFunction +import io.airbyte.cdk.integrations.destination.record_buffer.SerializedBufferingStrategy +import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog +import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve +import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import java.time.Instant +import java.util.* +import java.util.function.Consumer +import java.util.function.Function +import java.util.stream.Collectors +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +/** + * Uses both Factory and Consumer design pattern to create a single point of creation for consuming + * [AirbyteMessage] for processing + */ +open class SerialStagingConsumerFactory { + fun create( + outputRecordCollector: Consumer, + database: JdbcDatabase, + stagingOperations: StagingOperations, + namingResolver: NamingConventionTransformer, + onCreateBuffer: BufferCreateFunction, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + purgeStagingData: Boolean, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper, + parsedCatalog: ParsedCatalog, + defaultNamespace: String?, + useDestinationsV2Columns: Boolean + ): AirbyteMessageConsumer { + val writeConfigs = + createWriteConfigs( + namingResolver, + config, + catalog, + parsedCatalog, + useDestinationsV2Columns + ) + return BufferedStreamConsumer( + outputRecordCollector, + GeneralStagingFunctions.onStartFunction( + database, + stagingOperations, + writeConfigs, + typerDeduper + ), + SerializedBufferingStrategy( + onCreateBuffer, + catalog, + SerialFlush.function( + database, + stagingOperations, + writeConfigs, + catalog, + typerDeduperValve, + typerDeduper + ) + ), + GeneralStagingFunctions.onCloseFunction( + database, + stagingOperations, + writeConfigs, + purgeStagingData, + typerDeduper + ), + catalog, + { data: JsonNode? -> stagingOperations.isValidData(data) }, + defaultNamespace + ) + } + + companion object { + private val LOGGER: Logger = + LoggerFactory.getLogger(SerialStagingConsumerFactory::class.java) + + // using a random string here as a placeholder for the moment. + // This would avoid mixing data in the staging area between different syncs (especially if + // they + // manipulate streams with similar names) + // if we replaced the random connection id by the actual connection_id, we'd gain the + // opportunity to + // leverage data that was uploaded to stage + // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) + // instead. + // This would also allow other programs/scripts + // to load (or reload backups?) in the connection's staging area to be loaded at the next + // sync. + private val SYNC_DATETIME: Instant = Instant.now() + val RANDOM_CONNECTION_ID: UUID = UUID.randomUUID() + + /** + * Creates a list of all [WriteConfig] for each stream within a [ConfiguredAirbyteCatalog]. + * Each write config represents the configuration settings for writing to a destination + * connector + * + * @param namingResolver [NamingConventionTransformer] used to transform names that are + * acceptable by each destination connector + * @param config destination connector configuration parameters + * @param catalog [ConfiguredAirbyteCatalog] collection of configured + * [ConfiguredAirbyteStream] + * @return list of all write configs for each stream in a [ConfiguredAirbyteCatalog] + */ + private fun createWriteConfigs( + namingResolver: NamingConventionTransformer, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + parsedCatalog: ParsedCatalog, + useDestinationsV2Columns: Boolean + ): List { + return catalog.streams + .stream() + .map(toWriteConfig(namingResolver, config, parsedCatalog, useDestinationsV2Columns)) + .collect(Collectors.toList()) + } + + private fun toWriteConfig( + namingResolver: NamingConventionTransformer, + config: JsonNode, + parsedCatalog: ParsedCatalog, + useDestinationsV2Columns: Boolean + ): Function { + return Function { stream: ConfiguredAirbyteStream -> + Preconditions.checkNotNull( + stream.destinationSyncMode, + "Undefined destination sync mode" + ) + val abStream = stream.stream + val streamName = abStream.name + + val outputSchema: String + val tableName: String? + if (useDestinationsV2Columns) { + val streamId = parsedCatalog.getStream(abStream.namespace, streamName).id + outputSchema = streamId.rawNamespace!! + tableName = streamId.rawName + } else { + outputSchema = + getOutputSchema(abStream, config["schema"].asText(), namingResolver) + tableName = namingResolver.getRawTableName(streamName) + } + val tmpTableName = namingResolver.getTmpTableName(streamName) + val syncMode = stream.destinationSyncMode + + val writeConfig = + WriteConfig( + streamName, + abStream.namespace, + outputSchema, + tmpTableName, + tableName, + syncMode, + SYNC_DATETIME + ) + LOGGER.info("Write config: {}", writeConfig) + writeConfig + } + } + + private fun getOutputSchema( + stream: AirbyteStream, + defaultDestSchema: String, + namingResolver: NamingConventionTransformer + ): String { + return if (stream.namespace != null) namingResolver.getNamespace(stream.namespace) + else namingResolver.getNamespace(defaultDestSchema) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.java deleted file mode 100644 index df4b4511c4be..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.destination.StandardNameTransformer; -import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler; -import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; -import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationHandler; -import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator; -import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration; -import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState.Impl; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.junit.jupiter.api.Test; - -public class AbstractJdbcDestinationTest { - - private JsonNode buildConfigNoJdbcParameters() { - return Jsons.jsonNode(ImmutableMap.of( - JdbcUtils.HOST_KEY, "localhost", - JdbcUtils.PORT_KEY, 1337, - JdbcUtils.USERNAME_KEY, "user", - JdbcUtils.DATABASE_KEY, "db")); - } - - private JsonNode buildConfigWithExtraJdbcParameters(final String extraParam) { - return Jsons.jsonNode(ImmutableMap.of( - JdbcUtils.HOST_KEY, "localhost", - JdbcUtils.PORT_KEY, 1337, - JdbcUtils.USERNAME_KEY, "user", - JdbcUtils.DATABASE_KEY, "db", - JdbcUtils.JDBC_URL_PARAMS_KEY, extraParam)); - } - - @Test - void testNoExtraParamsNoDefault() { - final Map connectionProperties = new TestJdbcDestination().getConnectionProperties(buildConfigNoJdbcParameters()); - - final Map expectedProperties = ImmutableMap.of(); - assertEquals(expectedProperties, connectionProperties); - } - - @Test - void testNoExtraParamsWithDefault() { - final Map defaultProperties = ImmutableMap.of("A_PARAMETER", "A_VALUE"); - - final Map connectionProperties = new TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigNoJdbcParameters()); - - assertEquals(defaultProperties, connectionProperties); - } - - @Test - void testExtraParamNoDefault() { - final String extraParam = "key1=value1&key2=value2&key3=value3"; - final Map connectionProperties = new TestJdbcDestination().getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)); - final Map expectedProperties = ImmutableMap.of( - "key1", "value1", - "key2", "value2", - "key3", "value3"); - assertEquals(expectedProperties, connectionProperties); - } - - @Test - void testExtraParamWithDefault() { - final Map defaultProperties = ImmutableMap.of("A_PARAMETER", "A_VALUE"); - final String extraParam = "key1=value1&key2=value2&key3=value3"; - final Map connectionProperties = new TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)); - final Map expectedProperties = ImmutableMap.of( - "A_PARAMETER", "A_VALUE", - "key1", "value1", - "key2", "value2", - "key3", "value3"); - assertEquals(expectedProperties, connectionProperties); - } - - @Test - void testExtraParameterEqualToDefault() { - final Map defaultProperties = ImmutableMap.of("key1", "value1"); - final String extraParam = "key1=value1&key2=value2&key3=value3"; - final Map connectionProperties = new TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)); - final Map expectedProperties = ImmutableMap.of( - "key1", "value1", - "key2", "value2", - "key3", "value3"); - assertEquals(expectedProperties, connectionProperties); - } - - @Test - void testExtraParameterDiffersFromDefault() { - final Map defaultProperties = ImmutableMap.of("key1", "value0"); - final String extraParam = "key1=value1&key2=value2&key3=value3"; - - assertThrows(IllegalArgumentException.class, () -> new TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam))); - } - - @Test - void testInvalidExtraParam() { - final String extraParam = "key1=value1&sdf&"; - assertThrows(ConfigErrorException.class, - () -> new TestJdbcDestination().getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam))); - } - - static class TestJdbcDestination extends AbstractJdbcDestination { - - private final Map defaultProperties; - - public TestJdbcDestination() { - this(new HashMap<>()); - } - - public TestJdbcDestination(final Map defaultProperties) { - super("", new StandardNameTransformer(), new TestJdbcSqlOperations()); - this.defaultProperties = defaultProperties; - } - - @Override - protected Map getDefaultConnectionProperties(final JsonNode config) { - return defaultProperties; - } - - @Override - public JsonNode toJdbcConfig(final JsonNode config) { - return config; - } - - @Override - protected JdbcSqlGenerator getSqlGenerator() { - // TODO do we need to populate this? - return null; - } - - @Override - protected JdbcDestinationHandler getDestinationHandler(String databaseName, JdbcDatabase database, String rawTableSchema) { - return null; - } - - @Override - protected List> getMigrations(JdbcDatabase database, - String databaseName, - SqlGenerator sqlGenerator, - DestinationHandler destinationHandler) { - return Collections.emptyList(); - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.java deleted file mode 100644 index 6157847f7ccf..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc; - -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage; -import java.sql.SQLException; -import java.util.List; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; -import org.mockito.Mockito; - -public class TestJdbcSqlOperations extends JdbcSqlOperations { - - @Override - public void insertRecordsInternal(final JdbcDatabase database, - final List records, - final String schemaName, - final String tableName) - throws Exception { - // Not required for the testing - } - - @Override - protected void insertRecordsInternalV2(final JdbcDatabase database, - final List records, - final String schemaName, - final String tableName) - throws Exception { - // Not required for the testing - } - - @Test - public void testCreateSchemaIfNotExists() { - final JdbcDatabase db = Mockito.mock(JdbcDatabase.class); - final var schemaName = "foo"; - try { - Mockito.doThrow(new SQLException("TEST")).when(db).execute(Mockito.anyString()); - } catch (final Exception e) { - // This would not be expected, but the `execute` method above will flag as an unhandled exception - assert false; - } - final SQLException exception = Assertions.assertThrows(SQLException.class, () -> createSchemaIfNotExists(db, schemaName)); - Assertions.assertEquals(exception.getMessage(), "TEST"); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.java deleted file mode 100644 index e8ea8f8e12c8..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.jdbc.copy; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.integrations.base.Destination; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import java.util.Map; -import java.util.function.Consumer; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class SwitchingDestinationTest { - - enum SwitchingEnum { - INSERT, - COPY - } - - private Destination insertDestination; - private Destination copyDestination; - private Map destinationMap; - - @BeforeEach - public void setUp() { - insertDestination = mock(Destination.class); - copyDestination = mock(Destination.class); - destinationMap = ImmutableMap.of( - SwitchingEnum.INSERT, insertDestination, - SwitchingEnum.COPY, copyDestination); - } - - @Test - public void testInsert() throws Exception { - final var switchingDestination = new SwitchingDestination<>(SwitchingEnum.class, c -> SwitchingEnum.INSERT, destinationMap); - - switchingDestination.getConsumer(mock(JsonNode.class), mock(ConfiguredAirbyteCatalog.class), mock(Consumer.class)); - - verify(insertDestination, times(1)).getConsumer(any(), any(), any()); - verify(copyDestination, times(0)).getConsumer(any(), any(), any()); - - switchingDestination.check(mock(JsonNode.class)); - - verify(insertDestination, times(1)).check(any()); - verify(copyDestination, times(0)).check(any()); - } - - @Test - public void testCopy() throws Exception { - final var switchingDestination = new SwitchingDestination<>(SwitchingEnum.class, c -> SwitchingEnum.COPY, destinationMap); - - switchingDestination.getConsumer(mock(JsonNode.class), mock(ConfiguredAirbyteCatalog.class), mock(Consumer.class)); - - verify(insertDestination, times(0)).getConsumer(any(), any(), any()); - verify(copyDestination, times(1)).getConsumer(any(), any(), any()); - - switchingDestination.check(mock(JsonNode.class)); - - verify(insertDestination, times(0)).check(any()); - verify(copyDestination, times(1)).check(any()); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.java deleted file mode 100644 index 4255c9be9884..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.destination.staging; - -import static org.junit.jupiter.api.Assertions.*; - -import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig; -import io.airbyte.commons.exceptions.ConfigErrorException; -import java.util.List; -import org.junit.jupiter.api.Test; - -class SerialStagingConsumerFactoryTest { - - @Test() - void detectConflictingStreams() { - final ConfigErrorException configErrorException = assertThrows( - ConfigErrorException.class, - () -> SerialFlush.function( - null, - null, - List.of( - new WriteConfig("example_stream", "source_schema", "destination_default_schema", null, null, null), - new WriteConfig("example_stream", "source_schema", "destination_default_schema", null, null, null)), - null, - null, - null)); - - assertEquals( - "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using ${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: source_schema.example_stream, source_schema.example_stream", - configErrorException.getMessage()); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.java deleted file mode 100644 index 59faa94eefaa..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/java/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import static org.junit.jupiter.api.Assertions.*; - -import java.time.Instant; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.time.temporal.ChronoUnit; -import org.junit.jupiter.api.Test; - -class TestingNamespacesTest { - - private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd"); - - @Test - void testGenerate() { - final String[] namespace = TestingNamespaces.generate().split("_"); - assertEquals("test", namespace[0]); - assertEquals(FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), namespace[1]); - assertFalse(namespace[2].isBlank()); - } - - @Test - void testGenerateWithPrefix() { - final String[] namespace = TestingNamespaces.generate("myprefix").split("_"); - assertEquals("myprefix", namespace[0]); - assertEquals("test", namespace[1]); - assertEquals(FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), namespace[2]); - assertFalse(namespace[3].isBlank()); - } - - @Test - void testIsOlderThan2Days() { - assertFalse(TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(0) + "_12345")); - assertTrue(TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(2) + "_12345")); - } - - @Test - void doesNotFailOnNonConventionalNames() { - assertFalse(TestingNamespaces.isOlderThan2Days("12345")); - assertFalse(TestingNamespaces.isOlderThan2Days("test_12345")); - assertFalse(TestingNamespaces.isOlderThan2Days("hello_test_12345")); - assertFalse(TestingNamespaces.isOlderThan2Days("myprefix_test1_" + getDate(2) + "_12345")); - - } - - private static String getDate(final int daysAgo) { - return FORMATTER.format(Instant.now().minus(daysAgo, ChronoUnit.DAYS).atZone(ZoneId.of("UTC")).toLocalDate()); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt new file mode 100644 index 000000000000..c99b55685a63 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.fasterxml.jackson.databind.JsonNode +import com.google.common.collect.ImmutableMap +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.db.jdbc.JdbcUtils +import io.airbyte.cdk.integrations.destination.StandardNameTransformer +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator +import io.airbyte.commons.exceptions.ConfigErrorException +import io.airbyte.commons.json.Jsons +import io.airbyte.integrations.base.destination.typing_deduping.DestinationHandler +import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator +import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration +import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test +import org.mockito.Mockito.mock + +class AbstractJdbcDestinationTest { + private fun buildConfigNoJdbcParameters(): JsonNode { + return Jsons.jsonNode( + ImmutableMap.of( + JdbcUtils.HOST_KEY, + "localhost", + JdbcUtils.PORT_KEY, + 1337, + JdbcUtils.USERNAME_KEY, + "user", + JdbcUtils.DATABASE_KEY, + "db" + ) + ) + } + + private fun buildConfigWithExtraJdbcParameters(extraParam: String): JsonNode { + return Jsons.jsonNode( + ImmutableMap.of( + JdbcUtils.HOST_KEY, + "localhost", + JdbcUtils.PORT_KEY, + 1337, + JdbcUtils.USERNAME_KEY, + "user", + JdbcUtils.DATABASE_KEY, + "db", + JdbcUtils.JDBC_URL_PARAMS_KEY, + extraParam + ) + ) + } + + @Test + fun testNoExtraParamsNoDefault() { + val connectionProperties = + TestJdbcDestination().getConnectionProperties(buildConfigNoJdbcParameters()) + + val expectedProperties: Map = ImmutableMap.of() + Assertions.assertEquals(expectedProperties, connectionProperties) + } + + @Test + fun testNoExtraParamsWithDefault() { + val defaultProperties: Map = ImmutableMap.of("A_PARAMETER", "A_VALUE") + + val connectionProperties = + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigNoJdbcParameters()) + + Assertions.assertEquals(defaultProperties, connectionProperties) + } + + @Test + fun testExtraParamNoDefault() { + val extraParam = "key1=value1&key2=value2&key3=value3" + val connectionProperties = + TestJdbcDestination() + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + val expectedProperties: Map = + ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value3") + Assertions.assertEquals(expectedProperties, connectionProperties) + } + + @Test + fun testExtraParamWithDefault() { + val defaultProperties: Map = ImmutableMap.of("A_PARAMETER", "A_VALUE") + val extraParam = "key1=value1&key2=value2&key3=value3" + val connectionProperties = + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + val expectedProperties: Map = + ImmutableMap.of( + "A_PARAMETER", + "A_VALUE", + "key1", + "value1", + "key2", + "value2", + "key3", + "value3" + ) + Assertions.assertEquals(expectedProperties, connectionProperties) + } + + @Test + fun testExtraParameterEqualToDefault() { + val defaultProperties: Map = ImmutableMap.of("key1", "value1") + val extraParam = "key1=value1&key2=value2&key3=value3" + val connectionProperties = + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + val expectedProperties: Map = + ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value3") + Assertions.assertEquals(expectedProperties, connectionProperties) + } + + @Test + fun testExtraParameterDiffersFromDefault() { + val defaultProperties: Map = ImmutableMap.of("key1", "value0") + val extraParam = "key1=value1&key2=value2&key3=value3" + + Assertions.assertThrows(IllegalArgumentException::class.java) { + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + } + } + + @Test + fun testInvalidExtraParam() { + val extraParam = "key1=value1&sdf&" + Assertions.assertThrows(ConfigErrorException::class.java) { + TestJdbcDestination() + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + } + } + + internal class TestJdbcDestination + @JvmOverloads + constructor(private val defaultProperties: Map = HashMap()) : + AbstractJdbcDestination( + "", + StandardNameTransformer(), + TestJdbcSqlOperations() + ) { + override fun getDefaultConnectionProperties(config: JsonNode): Map { + return defaultProperties + } + + override fun toJdbcConfig(config: JsonNode): JsonNode { + return config + } + + override val sqlGenerator: JdbcSqlGenerator = mock() + + override fun getDestinationHandler( + databaseName: String, + database: JdbcDatabase, + rawTableSchema: String + ): JdbcDestinationHandler { + return mock() + } + + override fun getMigrations( + database: JdbcDatabase, + databaseName: String, + sqlGenerator: SqlGenerator, + destinationHandler: DestinationHandler + ): List> { + return emptyList() + } + + public override fun getConnectionProperties(config: JsonNode): Map = + super.getConnectionProperties(config) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt new file mode 100644 index 000000000000..0f188975a419 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.commons.json.Jsons +import java.util.function.Function +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +internal class DataAdapterTest { + private val testData: JsonNode = + Jsons.deserialize( + "{\"attr1\" : \"CCC\", \"obj1\" : [{\"sub1\" : \"BBB\"}, {\"sub1\" : \"CCC\"}]}" + ) + private val replaceCCCFunction = Function { jsonNode: JsonNode -> + if (jsonNode.isTextual) { + val textValue = jsonNode.textValue().replace("CCC".toRegex(), "FFF") + return@Function Jsons.jsonNode(textValue) + } else return@Function jsonNode + } + + @Test + fun checkSkipAll() { + val data = testData.deepCopy() + val adapter = DataAdapter({ jsonNode: JsonNode? -> false }, replaceCCCFunction) + adapter.adapt(data) + + Assertions.assertEquals(testData, data) + } + + @Test + fun checkSkip() { + val data = testData.deepCopy() + val adapter = + DataAdapter( + { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue().contains("BBB") + }, + replaceCCCFunction + ) + adapter.adapt(data) + + Assertions.assertEquals(testData, data) + } + + @Test + fun checkAdapt() { + val data = testData.deepCopy() + val adapter = + DataAdapter( + { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue().contains("CCC") + }, + replaceCCCFunction + ) + adapter.adapt(data) + println(data) + + Assertions.assertNotEquals(testData, data) + assert( + data.findValues("sub1").stream().anyMatch { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue() == "FFF" + } + ) + assert( + data.findValues("attr1").stream().anyMatch { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue() == "FFF" + } + ) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt new file mode 100644 index 000000000000..75c32ceba400 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc + +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import java.sql.SQLException +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test +import org.mockito.Mockito + +class TestJdbcSqlOperations : JdbcSqlOperations() { + @Throws(Exception::class) + public override fun insertRecordsInternal( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) { + // Not required for the testing + } + + @Throws(Exception::class) + override fun insertRecordsInternalV2( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) { + // Not required for the testing + } + + @Test + fun testCreateSchemaIfNotExists() { + val db = Mockito.mock(JdbcDatabase::class.java) + val schemaName = "foo" + try { + Mockito.doThrow(SQLException("TEST")).`when`(db).execute(Mockito.anyString()) + } catch (e: Exception) { + // This would not be expected, but the `execute` method above will flag as an unhandled + // exception + assert(false) + } + val exception = + Assertions.assertThrows(SQLException::class.java) { + createSchemaIfNotExists(db, schemaName) + } + Assertions.assertEquals(exception.message, "TEST") + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt new file mode 100644 index 000000000000..214d181fab2b --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.jdbc.copy + +import com.fasterxml.jackson.databind.JsonNode +import com.google.common.collect.ImmutableMap +import io.airbyte.cdk.integrations.base.Destination +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.mockito.Mockito +import org.mockito.Mockito.mock +import org.mockito.kotlin.any + +internal class SwitchingDestinationTest { + internal enum class SwitchingEnum { + INSERT, + COPY + } + + private lateinit var insertDestination: Destination + private lateinit var copyDestination: Destination + private lateinit var destinationMap: Map + + @BeforeEach + fun setUp() { + insertDestination = Mockito.mock(Destination::class.java) + copyDestination = Mockito.mock(Destination::class.java) + destinationMap = + ImmutableMap.of( + SwitchingEnum.INSERT, + insertDestination, + SwitchingEnum.COPY, + copyDestination + ) + } + + @Test + @Throws(Exception::class) + fun testInsert() { + val switchingDestination = + SwitchingDestination( + SwitchingEnum::class.java, + { c: JsonNode? -> SwitchingEnum.INSERT }, + destinationMap + ) + + switchingDestination.getConsumer( + Mockito.mock(JsonNode::class.java), + Mockito.mock(ConfiguredAirbyteCatalog::class.java), + mock() + ) + + Mockito.verify(insertDestination, Mockito.times(1)).getConsumer(any(), any(), any()) + Mockito.verify(copyDestination, Mockito.times(0)).getConsumer(any(), any(), any()) + + switchingDestination.check(Mockito.mock(JsonNode::class.java)) + + Mockito.verify(insertDestination, Mockito.times(1)).check(any()) + Mockito.verify(copyDestination, Mockito.times(0)).check(any()) + } + + @Test + @Throws(Exception::class) + fun testCopy() { + val switchingDestination = + SwitchingDestination( + SwitchingEnum::class.java, + { c: JsonNode? -> SwitchingEnum.COPY }, + destinationMap + ) + + switchingDestination.getConsumer( + Mockito.mock(JsonNode::class.java), + Mockito.mock(ConfiguredAirbyteCatalog::class.java), + Mockito.mock() + ) + + Mockito.verify(insertDestination, Mockito.times(0)).getConsumer(any(), any(), any()) + Mockito.verify(copyDestination, Mockito.times(1)).getConsumer(any(), any(), any()) + + switchingDestination.check(Mockito.mock(JsonNode::class.java)) + + Mockito.verify(insertDestination, Mockito.times(0)).check(any()) + Mockito.verify(copyDestination, Mockito.times(1)).check(any()) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt new file mode 100644 index 000000000000..5684804c0ba7 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.destination.staging + +import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig +import io.airbyte.commons.exceptions.ConfigErrorException +import java.util.List +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test +import org.mockito.Mockito.mock + +internal class SerialStagingConsumerFactoryTest { + @Test + fun detectConflictingStreams() { + val configErrorException = + Assertions.assertThrows(ConfigErrorException::class.java) { + SerialFlush.function( + null, + mock(), + List.of( + WriteConfig( + "example_stream", + "source_schema", + "destination_default_schema", + null, + null, + mock() + ), + WriteConfig( + "example_stream", + "source_schema", + "destination_default_schema", + null, + null, + mock() + ) + ), + mock(), + mock(), + mock() + ) + } + + Assertions.assertEquals( + "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using \${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: source_schema.example_stream, source_schema.example_stream", + configErrorException.message + ) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt new file mode 100644 index 000000000000..e3f882029b25 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import java.time.Instant +import java.time.ZoneId +import java.time.format.DateTimeFormatter +import java.time.temporal.ChronoUnit +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +internal class TestingNamespacesTest { + @Test + fun testGenerate() { + val namespace = + TestingNamespaces.generate() + .split("_".toRegex()) + .dropLastWhile { it.isEmpty() } + .toTypedArray() + Assertions.assertEquals("test", namespace[0]) + Assertions.assertEquals( + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), + namespace[1] + ) + Assertions.assertFalse(namespace[2].isBlank()) + } + + @Test + fun testGenerateWithPrefix() { + val namespace = + TestingNamespaces.generate("myprefix") + .split("_".toRegex()) + .dropLastWhile { it.isEmpty() } + .toTypedArray() + Assertions.assertEquals("myprefix", namespace[0]) + Assertions.assertEquals("test", namespace[1]) + Assertions.assertEquals( + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), + namespace[2] + ) + Assertions.assertFalse(namespace[3].isBlank()) + } + + @Test + fun testIsOlderThan2Days() { + Assertions.assertFalse( + TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(0) + "_12345") + ) + Assertions.assertTrue( + TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(2) + "_12345") + ) + } + + @Test + fun doesNotFailOnNonConventionalNames() { + Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("12345")) + Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("test_12345")) + Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("hello_test_12345")) + Assertions.assertFalse( + TestingNamespaces.isOlderThan2Days("myprefix_test1_" + getDate(2) + "_12345") + ) + } + + companion object { + private val FORMATTER: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyyMMdd") + + private fun getDate(daysAgo: Int): String { + return FORMATTER.format( + Instant.now() + .minus(daysAgo.toLong(), ChronoUnit.DAYS) + .atZone(ZoneId.of("UTC")) + .toLocalDate() + ) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.java deleted file mode 100644 index 8094c8fc214c..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ /dev/null @@ -1,1860 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider.INFINITY_TYPE_MESSAGE; -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider.INTEGER_TYPE_CATALOG; -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider.NAN_TYPE_MESSAGE; -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider.NUMBER_TYPE_CATALOG; -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil.prefixFileNameByVersion; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.fail; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataArgumentsProvider; -import io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider; -import io.airbyte.cdk.integrations.standardtest.destination.comparator.BasicTestDataComparator; -import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.jackson.MoreMappers; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.lang.Exceptions; -import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.util.MoreIterators; -import io.airbyte.configoss.JobGetSpecConfig; -import io.airbyte.configoss.OperatorDbt; -import io.airbyte.configoss.StandardCheckConnectionInput; -import io.airbyte.configoss.StandardCheckConnectionOutput; -import io.airbyte.configoss.StandardCheckConnectionOutput.Status; -import io.airbyte.configoss.WorkerDestinationConfig; -import io.airbyte.protocol.models.Field; -import io.airbyte.protocol.models.JsonSchemaType; -import io.airbyte.protocol.models.v0.AirbyteCatalog; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.AirbyteMessage.Type; -import io.airbyte.protocol.models.v0.AirbyteRecordMessage; -import io.airbyte.protocol.models.v0.AirbyteStateMessage; -import io.airbyte.protocol.models.v0.AirbyteStream; -import io.airbyte.protocol.models.v0.CatalogHelpers; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.v0.ConnectorSpecification; -import io.airbyte.protocol.models.v0.DestinationSyncMode; -import io.airbyte.protocol.models.v0.SyncMode; -import io.airbyte.workers.exception.TestHarnessException; -import io.airbyte.workers.general.DbtTransformationRunner; -import io.airbyte.workers.general.DefaultCheckConnectionTestHarness; -import io.airbyte.workers.general.DefaultGetSpecTestHarness; -import io.airbyte.workers.helper.ConnectorConfigUpdater; -import io.airbyte.workers.helper.EntrypointEnvChecker; -import io.airbyte.workers.internal.AirbyteDestination; -import io.airbyte.workers.internal.DefaultAirbyteDestination; -import io.airbyte.workers.normalization.DefaultNormalizationRunner; -import io.airbyte.workers.normalization.NormalizationRunner; -import io.airbyte.workers.process.AirbyteIntegrationLauncher; -import io.airbyte.workers.process.DockerProcessFactory; -import io.airbyte.workers.process.ProcessFactory; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Random; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import lombok.Builder; -import lombok.Getter; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtensionContext; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.ArgumentsProvider; -import org.junit.jupiter.params.provider.ArgumentsSource; -import org.mockito.Mockito; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public abstract class DestinationAcceptanceTest { - - protected HashSet TEST_SCHEMAS; - - private static final Random RANDOM = new Random(); - private static final String NORMALIZATION_VERSION = "dev"; - - private static final String JOB_ID = "0"; - private static final int JOB_ATTEMPT = 0; - - private static final String DUMMY_CATALOG_NAME = "DummyCatalog"; - - private static final Logger LOGGER = LoggerFactory.getLogger(DestinationAcceptanceTest.class); - - private TestDestinationEnv testEnv; - - private Path jobRoot; - private ProcessFactory processFactory; - private ConnectorConfigUpdater mConnectorConfigUpdater; - - protected Path localRoot; - protected TestDataComparator testDataComparator = getTestDataComparator(); - - /** - * Name of the docker image that the tests will run against. - * - * @return docker image name - */ - protected abstract String getImageName(); - - protected boolean supportsInDestinationNormalization() { - return false; - } - - protected Map inDestinationNormalizationFlags(final boolean shouldNormalize) { - if (shouldNormalize && supportsInDestinationNormalization()) { - return Map.of("NORMALIZATION_TECHNIQUE", "LEGACY"); - } - return Collections.emptyMap(); - } - - private String getImageNameWithoutTag() { - return getImageName().contains(":") ? getImageName().split(":")[0] : getImageName(); - } - - private JsonNode readMetadata() { - try { - return Jsons.jsonNodeFromFile(MoreResources.readResourceAsFile("metadata.yaml")); - } catch (IllegalArgumentException | URISyntaxException e) { - // Resource is not found. - return Jsons.emptyObject(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - protected String getNormalizationImageName() { - var metadata = readMetadata().get("data"); - if (metadata == null) { - return null; - } - var normalizationConfig = metadata.get("normalizationConfig"); - if (normalizationConfig == null) { - return null; - } - var normalizationRepository = normalizationConfig.get("normalizationRepository"); - if (normalizationRepository == null) { - return null; - } - return normalizationRepository.asText() + ":" + NORMALIZATION_VERSION; - } - - /** - * Configuration specific to the integration. Will be passed to integration where appropriate in - * each test. Should be valid. - * - * @return integration-specific configuration - */ - protected abstract JsonNode getConfig() throws Exception; - - /** - * Configuration specific to the integration. Will be passed to integration where appropriate in - * tests that test behavior when configuration is invalid. e.g incorrect password. Should be - * invalid. - * - * @return integration-specific configuration - */ - protected abstract JsonNode getFailCheckConfig() throws Exception; - - /** - * Function that returns all of the records in destination as json at the time this method is - * invoked. These will be used to check that the data actually written is what should actually be - * there. Note: this returns a set and does not test any order guarantees. - * - * @param testEnv - information about the test environment. - * @param streamName - name of the stream for which we are retrieving records. - * @param namespace - the destination namespace records are located in. Null if not applicable. - * Usually a JDBC schema. - * @param streamSchema - schema of the stream to be retrieved. This is only necessary for - * destinations in which data types cannot be accurately inferred (e.g. in CSV destination, - * every value is a string). - * @return All of the records in the destination at the time this method is invoked. - * @throws Exception - can throw any exception, test framework will handle. - */ - protected abstract List retrieveRecords(TestDestinationEnv testEnv, - String streamName, - String namespace, - JsonNode streamSchema) - throws Exception; - - /** - * Returns a destination's default schema. The default implementation assumes this corresponds to - * the configuration's 'schema' field, as this is how most of our destinations implement this. - * Destinations are free to appropriately override this. The return value is used to assert - * correctness. - *

- * If not applicable, Destinations are free to ignore this. - * - * @param config - integration-specific configuration returned by {@link #getConfig()}. - * @return the default schema, if applicatble. - */ - protected String getDefaultSchema(final JsonNode config) throws Exception { - if (config.get("schema") == null) { - return null; - } - final String schema = config.get("schema").asText(); - TEST_SCHEMAS.add(schema); - return schema; - } - - /** - * Override to return true if a destination implements namespaces and should be tested as such. - */ - protected boolean implementsNamespaces() { - return false; - } - - /** - * Detects if a destination implements append mode from the spec.json that should include - * 'supportsIncremental' = true - * - * @return - a boolean. - */ - protected boolean implementsAppend() throws TestHarnessException { - final ConnectorSpecification spec = runSpec(); - assertNotNull(spec); - if (spec.getSupportsIncremental() != null) { - return spec.getSupportsIncremental(); - } else { - return false; - } - } - - protected boolean normalizationFromDefinition() { - var metadata = readMetadata().get("data"); - if (metadata == null) { - return false; - } - var normalizationConfig = metadata.get("normalizationConfig"); - if (normalizationConfig == null) { - return false; - } - return normalizationConfig.has("normalizationRepository") && normalizationConfig.has("normalizationTag"); - } - - protected boolean dbtFromDefinition() { - var metadata = readMetadata().get("data"); - if (metadata == null) { - return false; - } - var supportsDbt = metadata.get("supportsDbt"); - return supportsDbt != null && supportsDbt.asBoolean(false); - } - - protected String getDestinationDefinitionKey() { - return getImageNameWithoutTag(); - } - - protected String getNormalizationIntegrationType() { - var metadata = readMetadata().get("data"); - if (metadata == null) { - return null; - } - var normalizationConfig = metadata.get("normalizationConfig"); - if (normalizationConfig == null) { - return null; - } - var normalizationIntegrationType = normalizationConfig.get("normalizationIntegrationType"); - if (normalizationIntegrationType == null) { - return null; - } - return normalizationIntegrationType.asText(); - } - - /** - * Detects if a destination implements append dedup mode from the spec.json that should include - * 'supportedDestinationSyncMode' - * - * @return - a boolean. - */ - protected boolean implementsAppendDedup() throws TestHarnessException { - final ConnectorSpecification spec = runSpec(); - assertNotNull(spec); - if (spec.getSupportedDestinationSyncModes() != null) { - return spec.getSupportedDestinationSyncModes().contains(DestinationSyncMode.APPEND_DEDUP); - } else { - return false; - } - } - - /** - * Detects if a destination implements overwrite mode from the spec.json that should include - * 'supportedDestinationSyncMode' - * - * @return - a boolean. - */ - protected boolean implementsOverwrite() throws TestHarnessException { - final ConnectorSpecification spec = runSpec(); - assertNotNull(spec); - if (spec.getSupportedDestinationSyncModes() != null) { - return spec.getSupportedDestinationSyncModes().contains(DestinationSyncMode.OVERWRITE); - } else { - return false; - } - } - - /** - * Same idea as {@link #retrieveRecords(TestDestinationEnv, String, String, JsonNode)}. Except this - * method should pull records from the table that contains the normalized records and convert them - * back into the data as it would appear in an {@link AirbyteRecordMessage}. Only need to override - * this method if {@link #normalizationFromDefinition} returns true. - * - * @param testEnv - information about the test environment. - * @param streamName - name of the stream for which we are retrieving records. - * @param namespace - the destination namespace records are located in. Null if not applicable. - * Usually a JDBC schema. - * @return All of the records in the destination at the time this method is invoked. - * @throws Exception - can throw any exception, test framework will handle. - */ - protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, - final String streamName, - final String namespace) - throws Exception { - throw new IllegalStateException("Not implemented"); - } - - /** - * Function that performs any setup of external resources required for the test. e.g. instantiate a - * postgres database. This function will be called before EACH test. - * - * @param testEnv - information about the test environment. - * @param TEST_SCHEMAS - * @throws Exception - can throw any exception, test framework will handle. - */ - protected abstract void setup(TestDestinationEnv testEnv, HashSet TEST_SCHEMAS) throws Exception; - - /** - * Function that performs any clean up of external resources required for the test. e.g. delete a - * postgres database. This function will be called after EACH test. It MUST remove all data in the - * destination so that there is no contamination across tests. - * - * @param testEnv - information about the test environment. - * @throws Exception - can throw any exception, test framework will handle. - */ - protected abstract void tearDown(TestDestinationEnv testEnv) throws Exception; - - /** - * @deprecated This method is moved to the AdvancedTestDataComparator. Please move your destination - * implementation of the method to your comparator implementation. - */ - @Deprecated - protected List resolveIdentifier(final String identifier) { - return List.of(identifier); - } - - @BeforeEach - void setUpInternal() throws Exception { - final Path testDir = Path.of("/tmp/airbyte_tests/"); - Files.createDirectories(testDir); - final Path workspaceRoot = Files.createTempDirectory(testDir, "test"); - jobRoot = Files.createDirectories(Path.of(workspaceRoot.toString(), "job")); - localRoot = Files.createTempDirectory(testDir, "output"); - LOGGER.info("jobRoot: {}", jobRoot); - LOGGER.info("localRoot: {}", localRoot); - testEnv = new TestDestinationEnv(localRoot); - mConnectorConfigUpdater = Mockito.mock(ConnectorConfigUpdater.class); - TEST_SCHEMAS = new HashSet<>(); - setup(testEnv, TEST_SCHEMAS); - - processFactory = new DockerProcessFactory( - workspaceRoot, - workspaceRoot.toString(), - localRoot.toString(), - "host", - Collections.emptyMap()); - } - - @AfterEach - void tearDownInternal() throws Exception { - tearDown(testEnv); - } - - /** - * Verify that when the integrations returns a valid spec. - */ - @Test - public void testGetSpec() throws TestHarnessException { - assertNotNull(runSpec()); - } - - /** - * Verify that when given valid credentials, that check connection returns a success response. - * Assume that the {@link DestinationAcceptanceTest#getConfig()} is valid. - */ - @Test - public void testCheckConnection() throws Exception { - assertEquals(Status.SUCCEEDED, runCheck(getConfig()).getStatus()); - } - - /** - * Verify that when given invalid credentials, that check connection returns a failed response. - * Assume that the {@link DestinationAcceptanceTest#getFailCheckConfig()} is invalid. - */ - @Test - public void testCheckConnectionInvalidCredentials() throws Exception { - assertEquals(Status.FAILED, runCheck(getFailCheckConfig()).getStatus()); - } - - /** - * Verify that the integration successfully writes records. Tests a wide variety of messages and - * schemas (aspirationally, anyway). - */ - @ParameterizedTest - @ArgumentsSource(DataArgumentsProvider.class) - public void testSync(final String messagesFilename, final String catalogFilename) - throws Exception { - final AirbyteCatalog catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - final List messages = MoreResources.readResource(messagesFilename).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false); - retrieveRawRecordsAndAssertSameMessages(catalog, messages, defaultSchema); - } - - /** - * This serves to test MSSQL 2100 limit parameters in a single query. this means that for Airbyte - * insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100 params - */ - @ParameterizedTest - @ArgumentsSource(DataArgumentsProvider.class) - public void testSyncWithLargeRecordBatch(final String messagesFilename, - final String catalogFilename) - throws Exception { - final AirbyteCatalog catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - final List messages = MoreResources.readResource(messagesFilename).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - - final List largeNumberRecords = Collections - .nCopies(400, messages) - .stream() - .flatMap(List::stream) - // regroup messages per stream - .sorted(Comparator - .comparing(AirbyteMessage::getType) - .thenComparing( - message -> message.getType().equals(Type.RECORD) ? message.getRecord().getStream() - : message.toString())) - .collect(Collectors.toList()); - - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, largeNumberRecords, configuredCatalog, false); - } - - /** - * Verify that the integration overwrites the first sync with the second sync. - */ - @Test - public void testSecondSync() throws Exception { - if (!implementsOverwrite()) { - LOGGER.info("Destination's spec.json does not support overwrite sync mode."); - return; - } - - final AirbyteCatalog catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - - final List firstSyncMessages = MoreResources.readResource( - DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion())).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false); - - // We need to make sure that other streams\tables\files in the same location will not be - // affected\deleted\overridden by our activities during first, second or any future sync. - // So let's create a dummy data that will be checked after all sync. It should remain the same - final AirbyteCatalog dummyCatalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - dummyCatalog.getStreams().get(0).setName(DUMMY_CATALOG_NAME); - final ConfiguredAirbyteCatalog configuredDummyCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - dummyCatalog); - // update messages to set new dummy stream name - firstSyncMessages.stream().filter(message -> message.getRecord() != null) - .forEach(message -> message.getRecord().setStream(DUMMY_CATALOG_NAME)); - // sync dummy data - runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredDummyCatalog, false); - - // Run second sync - final List secondSyncMessages = Lists.newArrayList( - new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(catalog.getStreams().get(0).getName()) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() - .put("id", 1) - .put("currency", "USD") - .put("date", "2020-03-31T00:00:00Z") - // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which - // fails destination tests - .put("HKD", 10.1) - .put("NZD", 700.1) - .build()))), - new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))); - - runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false); - final String defaultSchema = getDefaultSchema(config); - retrieveRawRecordsAndAssertSameMessages(catalog, secondSyncMessages, defaultSchema); - - // verify that other streams in the same location were not affected. If something fails here, - // then this need to be fixed in connectors logic to override only required streams - retrieveRawRecordsAndAssertSameMessages(dummyCatalog, firstSyncMessages, defaultSchema); - } - - /** - * Tests that we are able to read over special characters properly when processing line breaks in - * destinations. - */ - @Test - public void testLineBreakCharacters() throws Exception { - final AirbyteCatalog catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - final JsonNode config = getConfig(); - - final List secondSyncMessages = Lists.newArrayList( - new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(catalog.getStreams().get(0).getName()) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() - .put("id", 1) - .put("currency", "USD\u2028") - .put("date", "2020-03-\n31T00:00:00Z\r") - // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which - // fails destination tests - .put("HKD", 10.1) - .put("NZD", 700.1) - .build()))), - new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))); - - runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false); - final String defaultSchema = getDefaultSchema(config); - retrieveRawRecordsAndAssertSameMessages(catalog, secondSyncMessages, defaultSchema); - } - - @Test - public void normalizationFromDefinitionValueShouldBeCorrect() { - if (normalizationFromDefinition()) { - boolean normalizationRunnerFactorySupportsDestinationImage; - try { - new DefaultNormalizationRunner( - processFactory, - getNormalizationImageName(), - getNormalizationIntegrationType()); - normalizationRunnerFactorySupportsDestinationImage = true; - } catch (final IllegalStateException e) { - normalizationRunnerFactorySupportsDestinationImage = false; - } - assertEquals(normalizationFromDefinition(), normalizationRunnerFactorySupportsDestinationImage); - } - } - - /** - * Verify that the integration successfully writes records incrementally. The second run should - * append records to the datastore instead of overwriting the previous run. - */ - @Test - public void testIncrementalSync() throws Exception { - if (!implementsAppend()) { - LOGGER.info("Destination's spec.json does not include '\"supportsIncremental\" ; true'"); - return; - } - - final AirbyteCatalog catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - configuredCatalog.getStreams().forEach(s -> { - s.withSyncMode(SyncMode.INCREMENTAL); - s.withDestinationSyncMode(DestinationSyncMode.APPEND); - }); - - final List firstSyncMessages = MoreResources.readResource( - DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion())).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false); - final List secondSyncMessages = Lists.newArrayList( - new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(catalog.getStreams().get(0).getName()) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() - .put("id", 1) - .put("currency", "USD") - .put("date", "2020-03-31T00:00:00Z") - // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which - // fails destination tests - .put("HKD", 10.1) - .put("NZD", 700.1) - .build()))), - new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))); - runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false); - - final List expectedMessagesAfterSecondSync = new ArrayList<>(); - expectedMessagesAfterSecondSync.addAll(firstSyncMessages); - expectedMessagesAfterSecondSync.addAll(secondSyncMessages); - - final String defaultSchema = getDefaultSchema(config); - retrieveRawRecordsAndAssertSameMessages(catalog, expectedMessagesAfterSecondSync, - defaultSchema); - } - - @ArgumentsSource(DataArgumentsProvider.class) - @Test - public void testIncrementalSyncWithNormalizationDropOneColumn() - throws Exception { - if (!normalizationFromDefinition() || !supportIncrementalSchemaChanges()) { - return; - } - - final AirbyteCatalog catalog = Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(ProtocolVersion.V0)), - AirbyteCatalog.class); - - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - configuredCatalog.getStreams().forEach(s -> { - s.withSyncMode(SyncMode.INCREMENTAL); - s.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP); - s.withCursorField(Collections.emptyList()); - // use composite primary key of various types (string, float) - s.withPrimaryKey( - List.of(List.of("id"), List.of("currency"), List.of("date"), List.of("NZD"), List.of("USD"))); - }); - - List messages = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(ProtocolVersion.V0)) - .lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true); - - final String defaultSchema = getDefaultSchema(config); - List actualMessages = retrieveNormalizedRecords(catalog, - defaultSchema); - assertSameMessages(messages, actualMessages, true); - - // remove one field - final JsonNode jsonSchema = configuredCatalog.getStreams().get(0).getStream().getJsonSchema(); - ((ObjectNode) jsonSchema.findValue("properties")).remove("HKD"); - // insert more messages - // NOTE: we re-read the messages because `assertSameMessages` above pruned the emittedAt timestamps. - messages = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(ProtocolVersion.V0)).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - messages.add(Jsons.deserialize( - "{\"type\": \"RECORD\", \"record\": {\"stream\": \"exchange_rate\", \"emitted_at\": 1602637989500, \"data\": { \"id\": 2, \"currency\": \"EUR\", \"date\": \"2020-09-02T00:00:00Z\", \"NZD\": 1.14, \"USD\": 10.16}}}\n", - AirbyteMessage.class)); - - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true); - - // assert the removed field is missing on the new messages - actualMessages = retrieveNormalizedRecords(catalog, defaultSchema); - - // We expect all the of messages to be missing the removed column after normalization. - final List expectedMessages = messages.stream().map((message) -> { - if (message.getRecord() != null) { - ((ObjectNode) message.getRecord().getData()).remove("HKD"); - } - return message; - }).collect(Collectors.toList()); - assertSameMessages(expectedMessages, actualMessages, true); - } - - /** - * Verify that the integration successfully writes records successfully both raw and normalized. - * Tests a wide variety of messages an schemas (aspirationally, anyway). - */ - @ParameterizedTest - @ArgumentsSource(DataArgumentsProvider.class) - public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) - throws Exception { - if (!normalizationFromDefinition()) { - return; - } - - final AirbyteCatalog catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - final List messages = MoreResources.readResource(messagesFilename).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true); - - final String defaultSchema = getDefaultSchema(config); - final List actualMessages = retrieveNormalizedRecords(catalog, - defaultSchema); - assertSameMessages(messages, actualMessages, true); - } - - /** - * Verify that the integration successfully writes records successfully both raw and normalized and - * run dedupe transformations. - *

- * Although this test assumes append-dedup requires normalization, and almost all our Destinations - * do so, this is not necessarily true. This explains {@link #implementsAppendDedup()}. - */ - @Test - public void testIncrementalDedupeSync() throws Exception { - if (!implementsAppendDedup()) { - LOGGER.info( - "Destination's spec.json does not include 'append_dedupe' in its '\"supportedDestinationSyncModes\"'"); - return; - } - - final AirbyteCatalog catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - configuredCatalog.getStreams().forEach(s -> { - s.withSyncMode(SyncMode.INCREMENTAL); - s.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP); - s.withCursorField(Collections.emptyList()); - // use composite primary key of various types (string, float) - s.withPrimaryKey( - List.of(List.of("id"), List.of("currency"), List.of("date"), List.of("NZD"))); - }); - - final List firstSyncMessages = MoreResources.readResource( - DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion())).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, supportsNormalization()); - - final List secondSyncMessages = Lists.newArrayList( - new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(catalog.getStreams().get(0).getName()) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() - .put("id", 2) - .put("currency", "EUR") - .put("date", "2020-09-01T00:00:00Z") - .put("HKD", 10.5) - .put("NZD", 1.14) - .build()))), - new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(catalog.getStreams().get(0).getName()) - .withEmittedAt(Instant.now().toEpochMilli() + 100L) - .withData(Jsons.jsonNode(ImmutableMap.builder() - .put("id", 1) - .put("currency", "USD") - .put("date", "2020-09-01T00:00:00Z") - .put("HKD", 5.4) - .put("NZD", 1.14) - .build()))), - new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))); - runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false); - - final List expectedMessagesAfterSecondSync = new ArrayList<>(); - expectedMessagesAfterSecondSync.addAll(firstSyncMessages); - expectedMessagesAfterSecondSync.addAll(secondSyncMessages); - - final Map latestMessagesOnly = expectedMessagesAfterSecondSync - .stream() - .filter(message -> message.getType() == Type.RECORD && message.getRecord() != null) - .collect(Collectors.toMap( - message -> message.getRecord().getData().get("id").asText() + - message.getRecord().getData().get("currency").asText() + - message.getRecord().getData().get("date").asText() + - message.getRecord().getData().get("NZD").asText(), - message -> message, - // keep only latest emitted record message per primary key/cursor - (a, b) -> a.getRecord().getEmittedAt() > b.getRecord().getEmittedAt() ? a : b)); - // Filter expectedMessagesAfterSecondSync and keep latest messages only (keep same message order) - final List expectedMessages = expectedMessagesAfterSecondSync - .stream() - .filter(message -> message.getType() == Type.RECORD && message.getRecord() != null) - .filter(message -> { - final String key = message.getRecord().getData().get("id").asText() + - message.getRecord().getData().get("currency").asText() + - message.getRecord().getData().get("date").asText() + - message.getRecord().getData().get("NZD").asText(); - return message.getRecord().getEmittedAt() - .equals(latestMessagesOnly.get(key).getRecord().getEmittedAt()); - }).collect(Collectors.toList()); - - final String defaultSchema = getDefaultSchema(config); - retrieveRawRecordsAndAssertSameMessages(catalog, expectedMessagesAfterSecondSync, - defaultSchema); - if (normalizationFromDefinition()) { - final List actualMessages = retrieveNormalizedRecords(catalog, - defaultSchema); - assertSameMessages(expectedMessages, actualMessages, true); - } - } - - /** - * @return the max limit length allowed for values in the destination. - */ - protected int getMaxRecordValueLimit() { - return 1000000000; - } - - @Test - public void testCustomDbtTransformations() throws Exception { - if (!dbtFromDefinition()) { - return; - } - - final JsonNode config = getConfig(); - - // This may throw IllegalStateException "Requesting normalization, but it is not included in the - // normalization mappings" - // We indeed require normalization implementation of the 'transform_config' function for this - // destination, - // because we make sure to install required dbt dependency in the normalization docker image in - // order to run - // this test successfully and that we are able to convert a destination 'config.json' into a dbt - // 'profiles.yml' - // (we don't actually rely on normalization running anything else here though) - final DbtTransformationRunner runner = new DbtTransformationRunner(processFactory, - new DefaultNormalizationRunner( - processFactory, - getNormalizationImageName(), - getNormalizationIntegrationType())); - runner.start(); - final Path transformationRoot = Files.createDirectories(jobRoot.resolve("transform")); - final OperatorDbt dbtConfig = new OperatorDbt() - // Forked from https://github.com/dbt-labs/jaffle_shop because they made a change that would have - // required a dbt version upgrade - // https://github.com/dbt-labs/jaffle_shop/commit/b1680f3278437c081c735b7ea71c2ff9707bc75f#diff-27386df54b2629c1191d8342d3725ed8678413cfa13b5556f59d69d33fae5425R20 - // We're actually two commits upstream of that, because the previous commit - // (https://github.com/dbt-labs/jaffle_shop/commit/ec36ae177ab5cb79da39ff8ab068c878fbac13a0) also - // breaks something - // TODO once we're on DBT 1.x, switch this back to using the main branch - .withGitRepoUrl("https://github.com/airbytehq/jaffle_shop.git") - .withGitRepoBranch("pre_dbt_upgrade") - .withDockerImage(getNormalizationImageName()); - // - // jaffle_shop is a fictional ecommerce store maintained by fishtownanalytics/dbt. - // - // This dbt project transforms raw data from an app database into a customers and orders model ready - // for analytics. - // The repo is a self-contained playground dbt project, useful for testing out scripts, and - // communicating some of the core dbt concepts: - // - // 1. First, it tests if connection to the destination works. - dbtConfig.withDbtArguments("debug"); - if (!runner.run(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - throw new TestHarnessException("dbt debug Failed."); - } - // 2. Install any dependencies packages, if any - dbtConfig.withDbtArguments("deps"); - if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - throw new TestHarnessException("dbt deps Failed."); - } - // 3. It contains seeds that includes some (fake) raw data from a fictional app as CSVs data sets. - // This materializes the CSVs as tables in your target schema. - // Note that a typical dbt project does not require this step since dbt assumes your raw data is - // already in your warehouse. - dbtConfig.withDbtArguments("seed"); - if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - throw new TestHarnessException("dbt seed Failed."); - } - // 4. Run the models: - // Note: If this steps fails, it might mean that you need to make small changes to the SQL in the - // models folder to adjust for the flavor of SQL of your target database. - dbtConfig.withDbtArguments("run"); - if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - throw new TestHarnessException("dbt run Failed."); - } - // 5. Test the output of the models and tables have been properly populated: - dbtConfig.withDbtArguments("test"); - if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - throw new TestHarnessException("dbt test Failed."); - } - // 6. Generate dbt documentation for the project: - // This step is commented out because it takes a long time, but is not vital for Airbyte - // dbtConfig.withDbtArguments("docs generate"); - // if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - // throw new WorkerException("dbt docs generate Failed."); - // } - runner.close(); - } - - @Test - void testCustomDbtTransformationsFailure() throws Exception { - if (!normalizationFromDefinition() || !dbtFromDefinition()) { - // we require normalization implementation for this destination, because we make sure to install - // required dbt dependency in the normalization docker image in order to run this test successfully - // (we don't actually rely on normalization running anything here though) - return; - } - - final JsonNode config = getConfig(); - - final DbtTransformationRunner runner = new DbtTransformationRunner(processFactory, - new DefaultNormalizationRunner( - processFactory, - getNormalizationImageName(), - getNormalizationIntegrationType())); - runner.start(); - final Path transformationRoot = Files.createDirectories(jobRoot.resolve("transform")); - final OperatorDbt dbtConfig = new OperatorDbt() - .withGitRepoUrl("https://github.com/fishtown-analytics/dbt-learn-demo.git") - .withGitRepoBranch("main") - .withDockerImage("fishtownanalytics/dbt:0.19.1") - .withDbtArguments("debug"); - if (!runner.run(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { - throw new TestHarnessException("dbt debug Failed."); - } - - dbtConfig.withDbtArguments("test"); - assertFalse(runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig), - "dbt test should fail, as we haven't run dbt run on this project yet"); - } - - /** - * Verify the destination uses the namespace field if it is set. - */ - @Test - void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception { - if (!implementsNamespaces()) { - return; - } - - // TODO(davin): make these tests part of the catalog file. - final AirbyteCatalog catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - // A unique namespace is required to avoid test isolation problems. - final String namespace = TestingNamespaces.generate("source_namespace"); - TEST_SCHEMAS.add(namespace); - - catalog.getStreams().forEach(stream -> stream.setNamespace(namespace)); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - - final List messages = MoreResources.readResource( - DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion())).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final List messagesWithNewNamespace = getRecordMessagesWithNewNamespace( - messages, namespace); - - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false); - retrieveRawRecordsAndAssertSameMessages(catalog, messagesWithNewNamespace, defaultSchema); - } - - /** - * Verify a destination is able to write tables with the same name to different namespaces. - */ - @Test - void testSyncWriteSameTableNameDifferentNamespace() throws Exception { - if (!implementsNamespaces()) { - return; - } - - // TODO(davin): make these tests part of the catalog file. - final var catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - final var namespace1 = TestingNamespaces.generate("source_namespace"); - TEST_SCHEMAS.add(namespace1); - catalog.getStreams().forEach(stream -> stream.setNamespace(namespace1)); - - final var diffNamespaceStreams = new ArrayList(); - final var namespace2 = TestingNamespaces.generate("diff_source_namespace"); - TEST_SCHEMAS.add(namespace2); - final var mapper = MoreMappers.initMapper(); - for (final AirbyteStream stream : catalog.getStreams()) { - final var clonedStream = mapper.readValue(mapper.writeValueAsString(stream), - AirbyteStream.class); - clonedStream.setNamespace(namespace2); - diffNamespaceStreams.add(clonedStream); - } - catalog.getStreams().addAll(diffNamespaceStreams); - - final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final var messageFile = DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion()); - final var ns1Messages = MoreResources.readResource(messageFile).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final var ns1MessagesAtNamespace1 = getRecordMessagesWithNewNamespace(ns1Messages, namespace1); - final var ns2Messages = MoreResources.readResource(messageFile).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final var ns2MessagesAtNamespace2 = getRecordMessagesWithNewNamespace(ns2Messages, namespace2); - - final var allMessages = new ArrayList<>(ns1MessagesAtNamespace1); - allMessages.addAll(ns2MessagesAtNamespace2); - - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - runSyncAndVerifyStateOutput(config, allMessages, configuredCatalog, false); - retrieveRawRecordsAndAssertSameMessages(catalog, allMessages, defaultSchema); - } - - /** - * The goal of this test is to verify the expected conversions of a namespace as it appears in the - * catalog to how it appears in the destination. Each database has its own rules, so this test runs - * through several "edge" case sorts of names and checks the behavior. - * - * @param testCaseId - the id of each test case in namespace_test_cases.json so that we can handle - * an individual case specially for a specific database. - * @param namespaceInCatalog - namespace as it would appear in the catalog - * @param namespaceInDst - namespace as we would expect it to appear in the destination (this may be - * overridden for different databases). - * @throws Exception - broad catch of exception to hydrate log information with additional test case - * context. - */ - @ParameterizedTest - @ArgumentsSource(NamespaceTestCaseProvider.class) - public void testNamespaces(final String testCaseId, - final String namespaceInCatalog, - final String namespaceInDst) - throws Exception { - final Optional nameTransformer = getNameTransformer(); - nameTransformer.ifPresent( - namingConventionTransformer -> assertNamespaceNormalization(testCaseId, - namespaceInDst, - namingConventionTransformer.getNamespace(namespaceInCatalog))); - - if (!implementsNamespaces() || !supportNamespaceTest()) { - return; - } - - final AirbyteCatalog catalog = Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.NAMESPACE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - catalog.getStreams().forEach(stream -> stream.setNamespace(namespaceInCatalog)); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - - final List messages = MoreResources.readResource( - DataArgumentsProvider.NAMESPACE_CONFIG.getMessageFileVersion(getProtocolVersion())).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final List messagesWithNewNamespace = getRecordMessagesWithNewNamespace(messages, namespaceInCatalog); - - final JsonNode config = getConfig(); - try { - runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false); - // Add to the list of schemas to clean up. - TEST_SCHEMAS.add(namespaceInCatalog); - } catch (final Exception e) { - throw new IOException(String.format( - "[Test Case %s] Destination failed to sync data to namespace %s, see \"namespace_test_cases.json for details\"", - testCaseId, namespaceInCatalog), e); - } - } - - /** - * In order to launch a source on Kubernetes in a pod, we need to be able to wrap the entrypoint. - * The source connector must specify its entrypoint in the AIRBYTE_ENTRYPOINT variable. This test - * ensures that the entrypoint environment variable is set. - */ - @Test - public void testEntrypointEnvVar() throws Exception { - final String entrypoint = EntrypointEnvChecker.getEntrypointEnvVariable( - processFactory, - JOB_ID, - JOB_ATTEMPT, - jobRoot, - getImageName()); - - assertNotNull(entrypoint); - assertFalse(entrypoint.isBlank()); - } - - /** - * Verify that destination doesn't fail if new fields arrive in the data after initial schema - * discovery and sync. - * - * @throws Exception - */ - @Test - public void testSyncNotFailsWithNewFields() throws Exception { - if (!implementsOverwrite()) { - LOGGER.info("Destination's spec.json does not support overwrite sync mode."); - return; - } - - final AirbyteCatalog catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(getProtocolVersion())), - AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - - final List firstSyncMessages = MoreResources.readResource( - DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion())).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false); - final var stream = catalog.getStreams().get(0); - - // Run second sync with new fields on the message - final List secondSyncMessagesWithNewFields = Lists.newArrayList( - new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(stream.getName()) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() - .put("id", 1) - .put("currency", "USD") - .put("date", "2020-03-31T00:00:00Z") - .put("newFieldString", "Value for new field") - .put("newFieldNumber", 3) - .put("HKD", 10.1) - .put("NZD", 700.1) - .build()))), - new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))); - - // Run sync and verify that all message were written without failing - runSyncAndVerifyStateOutput(config, secondSyncMessagesWithNewFields, configuredCatalog, false); - final var destinationOutput = retrieveRecords(testEnv, stream.getName(), - getDefaultSchema(config), stream.getJsonSchema()); - // Remove state message - secondSyncMessagesWithNewFields.removeIf( - airbyteMessage -> airbyteMessage.getType().equals(Type.STATE)); - assertEquals(secondSyncMessagesWithNewFields.size(), destinationOutput.size()); - } - - /** - * Whether the destination should be tested against different namespaces. - */ - protected boolean supportNamespaceTest() { - return false; - } - - /** - * Set up the name transformer used by a destination to test it against a variety of namespaces. - */ - protected Optional getNameTransformer() { - return Optional.empty(); - } - - /** - * Override this method if the normalized namespace is different from the default one. E.g. BigQuery - * does allow a name starting with a number. So it should change the expected normalized namespace - * when testCaseId = "S3A-1". Find the testCaseId in "namespace_test_cases.json". - */ - protected void assertNamespaceNormalization(final String testCaseId, - final String expectedNormalizedNamespace, - final String actualNormalizedNamespace) { - assertEquals(expectedNormalizedNamespace, actualNormalizedNamespace, - String.format( - "Test case %s failed; if this is expected, please override assertNamespaceNormalization", - testCaseId)); - } - - private ConnectorSpecification runSpec() throws TestHarnessException { - return convertProtocolObject( - new DefaultGetSpecTestHarness( - new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName(), processFactory, null, null, false, new EnvVariableFeatureFlags())) - .run(new JobGetSpecConfig().withDockerImage(getImageName()), jobRoot).getSpec(), - ConnectorSpecification.class); - } - - protected StandardCheckConnectionOutput runCheck(final JsonNode config) throws TestHarnessException { - return new DefaultCheckConnectionTestHarness( - new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName(), processFactory, null, null, false, new EnvVariableFeatureFlags()), - mConnectorConfigUpdater) - .run(new StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot) - .getCheckConnection(); - } - - protected StandardCheckConnectionOutput.Status runCheckWithCatchedException( - final JsonNode config) { - try { - final StandardCheckConnectionOutput standardCheckConnectionOutput = new DefaultCheckConnectionTestHarness( - new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName(), processFactory, null, null, false, new EnvVariableFeatureFlags()), - mConnectorConfigUpdater) - .run(new StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot) - .getCheckConnection(); - return standardCheckConnectionOutput.getStatus(); - } catch (final Exception e) { - LOGGER.error("Failed to check connection:" + e.getMessage()); - } - return Status.FAILED; - } - - protected AirbyteDestination getDestination() { - return new DefaultAirbyteDestination( - new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName(), processFactory, null, null, false, new EnvVariableFeatureFlags())); - } - - protected void runSyncAndVerifyStateOutput(final JsonNode config, - final List messages, - final ConfiguredAirbyteCatalog catalog, - final boolean runNormalization) - throws Exception { - final List destinationOutput = runSync(config, messages, catalog, - runNormalization); - - final AirbyteMessage expectedStateMessage = reversed(messages) - .stream() - .filter(m -> m.getType() == Type.STATE) - .findFirst() - .orElseThrow(() -> new IllegalArgumentException( - "All message sets used for testing should include a state record")); - - Collections.reverse(destinationOutput); - final AirbyteMessage actualStateMessage = destinationOutput - .stream() - .filter(m -> m.getType() == Type.STATE) - .findFirst() - .map(msg -> { - // Modify state message to remove destination stats. - final AirbyteStateMessage clone = msg.getState(); - clone.setDestinationStats(null); - msg.setState(clone); - return msg; - }) - .orElseGet(() -> { - fail("Destination failed to output state"); - return null; - }); - - assertEquals(expectedStateMessage, actualStateMessage); - } - - /** - * Reverses a list by creating a new list with the same elements of the input list and then - * reversing it. The input list will not be altered. - * - * @param list to reverse - * @param type - * @return new list with elements of original reversed. - */ - public static List reversed(final List list) { - final ArrayList reversed = new ArrayList<>(list); - Collections.reverse(reversed); - return reversed; - } - - private List runSync( - final JsonNode config, - final List messages, - final ConfiguredAirbyteCatalog catalog, - final boolean runNormalization) - throws Exception { - - final WorkerDestinationConfig destinationConfig = new WorkerDestinationConfig() - .withConnectionId(UUID.randomUUID()) - .withCatalog(convertProtocolObject(catalog, io.airbyte.protocol.models.ConfiguredAirbyteCatalog.class)) - .withDestinationConnectionConfiguration(config); - - final AirbyteDestination destination = getDestination(); - - destination.start(destinationConfig, jobRoot, inDestinationNormalizationFlags(runNormalization)); - messages.forEach( - message -> Exceptions.toRuntime(() -> destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class)))); - destination.notifyEndOfInput(); - - final List destinationOutput = new ArrayList<>(); - while (!destination.isFinished()) { - destination.attemptRead().ifPresent(m -> destinationOutput.add(convertProtocolObject(m, AirbyteMessage.class))); - } - - destination.close(); - - if (!runNormalization || (supportsInDestinationNormalization())) { - return destinationOutput; - } - - final NormalizationRunner runner = new DefaultNormalizationRunner( - processFactory, - getNormalizationImageName(), - getNormalizationIntegrationType()); - runner.start(); - final Path normalizationRoot = Files.createDirectories(jobRoot.resolve("normalize")); - if (!runner.normalize(JOB_ID, JOB_ATTEMPT, normalizationRoot, - destinationConfig.getDestinationConnectionConfiguration(), - destinationConfig.getCatalog(), null)) { - throw new TestHarnessException("Normalization Failed."); - } - runner.close(); - return destinationOutput; - } - - protected void retrieveRawRecordsAndAssertSameMessages(final AirbyteCatalog catalog, - final List messages, - final String defaultSchema) - throws Exception { - final List actualMessages = new ArrayList<>(); - for (final AirbyteStream stream : catalog.getStreams()) { - final String streamName = stream.getName(); - final String schema = stream.getNamespace() != null ? stream.getNamespace() : defaultSchema; - final List msgList = retrieveRecords(testEnv, streamName, schema, - stream.getJsonSchema()) - .stream() - .map(data -> new AirbyteRecordMessage().withStream(streamName).withNamespace(schema) - .withData(data)) - .toList(); - actualMessages.addAll(msgList); - } - - assertSameMessages(messages, actualMessages, false); - } - - // ignores emitted at. - protected void assertSameMessages(final List expected, - final List actual, - final boolean pruneAirbyteInternalFields) { - final List expectedProcessed = expected.stream() - .filter(message -> message.getType() == AirbyteMessage.Type.RECORD) - .map(AirbyteMessage::getRecord) - .peek(recordMessage -> recordMessage.setEmittedAt(null)) - .map(recordMessage -> pruneAirbyteInternalFields ? safePrune(recordMessage) : recordMessage) - .map(AirbyteRecordMessage::getData) - .collect(Collectors.toList()); - - final List actualProcessed = actual.stream() - .map(recordMessage -> pruneAirbyteInternalFields ? safePrune(recordMessage) : recordMessage) - .map(AirbyteRecordMessage::getData) - .collect(Collectors.toList()); - - testDataComparator.assertSameData(expectedProcessed, actualProcessed); - } - - protected List retrieveNormalizedRecords(final AirbyteCatalog catalog, - final String defaultSchema) - throws Exception { - final List actualMessages = new ArrayList<>(); - - for (final AirbyteStream stream : catalog.getStreams()) { - final String streamName = stream.getName(); - - final List msgList = retrieveNormalizedRecords(testEnv, streamName, - defaultSchema) - .stream() - .map(data -> new AirbyteRecordMessage().withStream(streamName).withData(data)).toList(); - actualMessages.addAll(msgList); - } - return actualMessages; - } - - /** - * Same as {@link #pruneMutate(JsonNode)}, except does a defensive copy and returns a new json node - * object instead of mutating in place. - * - * @param record - record that will be pruned. - * @return pruned json node. - */ - private static AirbyteRecordMessage safePrune(final AirbyteRecordMessage record) { - final AirbyteRecordMessage clone = Jsons.clone(record); - pruneMutate(clone.getData()); - return clone; - } - - /** - * Prune fields that are added internally by airbyte and are not part of the original data. Used so - * that we can compare data that is persisted by an Airbyte worker to the original data. This method - * mutates the provided json in place. - * - * @param json - json that will be pruned. will be mutated in place! - */ - private static void pruneMutate(final JsonNode json) { - for (final String key : Jsons.keys(json)) { - final JsonNode node = json.get(key); - // recursively prune all airbyte internal fields. - if (node.isObject() || node.isArray()) { - pruneMutate(node); - } - - // prune the following - // - airbyte internal fields - // - fields that match what airbyte generates as hash ids - // - null values -- normalization will often return `: null` but in the original data that key - // likely did not exist in the original message. the most consistent thing to do is always remove - // the null fields (this choice does decrease our ability to check that normalization creates - // columns even if all the values in that column are null) - final HashSet airbyteInternalFields = Sets.newHashSet( - "emitted_at", - "ab_id", - "normalized_at", - "EMITTED_AT", - "AB_ID", - "NORMALIZED_AT", - "HASHID", - "unique_key", - "UNIQUE_KEY"); - if (airbyteInternalFields.stream() - .anyMatch(internalField -> key.toLowerCase().contains(internalField.toLowerCase())) - || json.get(key).isNull()) { - ((ObjectNode) json).remove(key); - } - } - } - - public static class TestDestinationEnv { - - private final Path localRoot; - - public TestDestinationEnv(final Path localRoot) { - this.localRoot = localRoot; - } - - public Path getLocalRoot() { - return localRoot; - } - - @Override - public String toString() { - return "TestDestinationEnv{" + - "localRoot=" + localRoot + - '}'; - } - - } - - /** - * This test MUST be disabled by default, but you may uncomment it and use when need to reproduce a - * performance issue for destination. This test helps you to emulate lot's of stream and messages in - * each simply changing the "streamsSize" args to set a number of tables\streams and the - * "messagesNumber" to a messages number that would be written in each stream. !!! Do NOT forget to - * manually remove all generated objects !!! Hint: To check the destination container output run - * "docker ps" command in console to find the container's id. Then run "docker container attach - * your_containers_id" (ex. docker container attach 18cc929f44c8) to see the container's output - */ - @Test - @Disabled - public void testStressPerformance() throws Exception { - final int streamsSize = 5; // number of generated streams - final int messagesNumber = 300; // number of msg to be written to each generated stream - - // Each stream will have an id and name fields - final String USERS_STREAM_NAME = "users"; // stream's name prefix. Will get "user0", "user1", etc. - final String ID = "id"; - final String NAME = "name"; - - // generate schema\catalogs - final List configuredAirbyteStreams = new ArrayList<>(); - for (int i = 0; i < streamsSize; i++) { - configuredAirbyteStreams - .add(CatalogHelpers.createAirbyteStream(USERS_STREAM_NAME + i, - Field.of(NAME, JsonSchemaType.STRING), - Field - .of(ID, JsonSchemaType.STRING))); - } - final AirbyteCatalog testCatalog = new AirbyteCatalog().withStreams(configuredAirbyteStreams); - final ConfiguredAirbyteCatalog configuredTestCatalog = CatalogHelpers - .toDefaultConfiguredCatalog(testCatalog); - - final JsonNode config = getConfig(); - final WorkerDestinationConfig destinationConfig = new WorkerDestinationConfig() - .withConnectionId(UUID.randomUUID()) - .withCatalog(convertProtocolObject(configuredTestCatalog, io.airbyte.protocol.models.ConfiguredAirbyteCatalog.class)) - .withDestinationConnectionConfiguration(config); - final AirbyteDestination destination = getDestination(); - - // Start destination - destination.start(destinationConfig, jobRoot, Collections.emptyMap()); - - final AtomicInteger currentStreamNumber = new AtomicInteger(0); - final AtomicInteger currentRecordNumberForStream = new AtomicInteger(0); - - // this is just a current state logger. Useful when running long hours tests to see the progress - final Thread countPrinter = new Thread(() -> { - while (true) { - System.out.println( - "currentStreamNumber=" + currentStreamNumber + ", currentRecordNumberForStream=" - + currentRecordNumberForStream + ", " + Instant.now()); - try { - Thread.sleep(10000); - } catch (final InterruptedException e) { - e.printStackTrace(); - } - } - - }); - countPrinter.start(); - - // iterate through streams - for (int streamCounter = 0; streamCounter < streamsSize; streamCounter++) { - LOGGER.info("Started new stream processing with #" + streamCounter); - // iterate through msm inside a particular stream - // Generate messages and put it to stream - for (int msgCounter = 0; msgCounter < messagesNumber; msgCounter++) { - final AirbyteMessage msg = new AirbyteMessage() - .withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME + streamCounter) - .withData( - Jsons.jsonNode( - ImmutableMap.builder().put(NAME, LOREM_IPSUM) - .put(ID, streamCounter + "_" + msgCounter) - .build())) - .withEmittedAt(Instant.now().toEpochMilli())); - try { - destination.accept(convertProtocolObject(msg, io.airbyte.protocol.models.AirbyteMessage.class)); - } catch (final Exception e) { - LOGGER.error("Failed to write a RECORD message: " + e); - throw new RuntimeException(e); - } - - currentRecordNumberForStream.set(msgCounter); - } - - // send state message here, it's required - final AirbyteMessage msgState = new AirbyteMessage() - .withType(AirbyteMessage.Type.STATE) - .withState(new AirbyteStateMessage() - .withData( - Jsons.jsonNode(ImmutableMap.builder().put("start_date", "2020-09-02").build()))); - try { - destination.accept(convertProtocolObject(msgState, io.airbyte.protocol.models.AirbyteMessage.class)); - } catch (final Exception e) { - LOGGER.error("Failed to write a STATE message: " + e); - throw new RuntimeException(e); - } - - currentStreamNumber.set(streamCounter); - } - - LOGGER.info(String - .format("Added %s messages to each of %s streams", currentRecordNumberForStream, - currentStreamNumber)); - // Close destination - destination.notifyEndOfInput(); - } - - private final static String LOREM_IPSUM = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque malesuada lacinia aliquet. Nam feugiat mauris vel magna dignissim feugiat. Nam non dapibus sapien, ac mattis purus. Donec mollis libero erat, a rutrum ipsum pretium id. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Integer nec aliquam leo. Aliquam eu dictum augue, a ornare elit.\n" - + "\n" - + "Nulla viverra blandit neque. Nam blandit varius efficitur. Nunc at sapien blandit, malesuada lectus vel, tincidunt orci. Proin blandit metus eget libero facilisis interdum. Aenean luctus scelerisque orci, at scelerisque sem vestibulum in. Nullam ornare massa sed dui efficitur, eget volutpat lectus elementum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Integer elementum mi vitae erat eleifend iaculis. Nullam eget tincidunt est, eget tempor est. Sed risus velit, iaculis vitae est in, volutpat consectetur odio. Aenean ut fringilla elit. Suspendisse non aliquet massa. Curabitur suscipit metus nunc, nec porttitor velit venenatis vel. Fusce vestibulum eleifend diam, lobortis auctor magna.\n" - + "\n" - + "Etiam maximus, mi feugiat pharetra mattis, nulla neque euismod metus, in congue nunc sem nec ligula. Curabitur aliquam, risus id convallis cursus, nunc orci sollicitudin enim, quis scelerisque nibh dui in ipsum. Suspendisse mollis, metus a dapibus scelerisque, sapien nulla pretium ipsum, non finibus sem orci et lectus. Aliquam dictum magna nisi, a consectetur urna euismod nec. In pulvinar facilisis nulla, id mollis libero pulvinar vel. Nam a commodo leo, eu commodo dolor. In hac habitasse platea dictumst. Curabitur auctor purus quis tortor laoreet efficitur. Quisque tincidunt, risus vel rutrum fermentum, libero urna dignissim augue, eget pulvinar nibh ligula ut tortor. Vivamus convallis non risus sed consectetur. Etiam accumsan enim ac nisl suscipit, vel congue lorem volutpat. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce non orci quis lacus rhoncus vestibulum nec ut magna. In varius lectus nec quam posuere finibus. Vivamus quis lectus vitae tortor sollicitudin fermentum.\n" - + "\n" - + "Pellentesque elementum vehicula egestas. Sed volutpat velit arcu, at imperdiet sapien consectetur facilisis. Suspendisse porttitor tincidunt interdum. Morbi gravida faucibus tortor, ut rutrum magna tincidunt a. Morbi eu nisi eget dui finibus hendrerit sit amet in augue. Aenean imperdiet lacus enim, a volutpat nulla placerat at. Suspendisse nibh ipsum, venenatis vel maximus ut, fringilla nec felis. Sed risus mi, egestas quis quam ullamcorper, pharetra vestibulum diam.\n" - + "\n" - + "Praesent finibus scelerisque elit, accumsan condimentum risus mattis vitae. Donec tristique hendrerit facilisis. Curabitur metus purus, venenatis non elementum id, finibus eu augue. Quisque posuere rhoncus ligula, et vehicula erat pulvinar at. Pellentesque vel quam vel lectus tincidunt congue quis id sapien. Ut efficitur mauris vitae pretium iaculis. Aliquam consectetur iaculis nisi vitae laoreet. Integer vel odio quis diam mattis tempor eget nec est. Donec iaculis facilisis neque, at dictum magna vestibulum ut. Sed malesuada non nunc ac consequat. Maecenas tempus lectus a nisl congue, ac venenatis diam viverra. Nam ac justo id nulla iaculis lobortis in eu ligula. Vivamus et ligula id sapien efficitur aliquet. Curabitur est justo, tempus vitae mollis quis, tincidunt vitae felis. Vestibulum molestie laoreet justo, nec mollis purus vulputate at."; - - protected TestDataComparator getTestDataComparator() { - return new BasicTestDataComparator(this::resolveIdentifier); - } - - protected boolean supportBasicDataTypeTest() { - return false; - } - - protected boolean supportArrayDataTypeTest() { - return false; - } - - protected boolean supportObjectDataTypeTest() { - return false; - } - - protected boolean supportIncrementalSchemaChanges() { - return false; - } - - /** - * NaN and Infinity test are not supported by default. Please override this method to specify - * NaN/Infinity types support example: - * - *

-   *
-   * protected SpecialNumericTypes getSpecialNumericTypesSupportTest() {
-   *   return SpecialNumericTypes.builder()
-   *       .supportNumberNan(true)
-   *       .supportIntegerNan(true)
-   *       .build();
-   * }
-   * 
- * - * @return SpecialNumericTypes with support flags - */ - protected static SpecialNumericTypes getSpecialNumericTypesSupportTest() { - return SpecialNumericTypes.builder().build(); - } - - /** - * The method should be overridden if destination connector support newer protocol version otherwise - * {@link io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion#V0} is used - *

- * NOTE: Method should be public in a sake of java reflection - * - * @return - */ - public ProtocolVersion getProtocolVersion() { - return ProtocolVersion.V0; - } - - private boolean checkTestCompatibility( - final DataTypeTestArgumentProvider.TestCompatibility testCompatibility) { - return testCompatibility.isTestCompatible(supportBasicDataTypeTest(), - supportArrayDataTypeTest(), supportObjectDataTypeTest()); - } - - @ParameterizedTest - @ArgumentsSource(DataTypeTestArgumentProvider.class) - public void testDataTypeTestWithNormalization(final String messagesFilename, - final String catalogFilename, - final DataTypeTestArgumentProvider.TestCompatibility testCompatibility) - throws Exception { - if (!checkTestCompatibility(testCompatibility)) { - return; - } - - final AirbyteCatalog catalog = readCatalogFromFile(catalogFilename); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog); - final List messages = readMessagesFromFile(messagesFilename); - - runAndCheck(catalog, configuredCatalog, messages); - } - - @Test - public void testSyncNumberNanDataType() throws Exception { - // NaN/Infinity protocol supports started from V1 version or higher - final SpecialNumericTypes numericTypesSupport = getSpecialNumericTypesSupportTest(); - if (getProtocolVersion().equals(ProtocolVersion.V0) || !numericTypesSupport.isSupportNumberNan()) { - return; - } - final AirbyteCatalog catalog = readCatalogFromFile(prefixFileNameByVersion(NUMBER_TYPE_CATALOG, getProtocolVersion())); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final List messages = readMessagesFromFile(prefixFileNameByVersion(NAN_TYPE_MESSAGE, getProtocolVersion())); - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - - runAndCheck(catalog, configuredCatalog, messages); - } - - @Test - public void testSyncIntegerNanDataType() throws Exception { - // NaN/Infinity protocol supports started from V1 version or higher - final SpecialNumericTypes numericTypesSupport = getSpecialNumericTypesSupportTest(); - if (getProtocolVersion().equals(ProtocolVersion.V0) || !numericTypesSupport.isSupportIntegerNan()) { - return; - } - final AirbyteCatalog catalog = readCatalogFromFile(prefixFileNameByVersion(INTEGER_TYPE_CATALOG, getProtocolVersion())); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final List messages = readMessagesFromFile(prefixFileNameByVersion(NAN_TYPE_MESSAGE, getProtocolVersion())); - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - - runAndCheck(catalog, configuredCatalog, messages); - } - - @Test - public void testSyncNumberInfinityDataType() throws Exception { - // NaN/Infinity protocol supports started from V1 version or higher - final SpecialNumericTypes numericTypesSupport = getSpecialNumericTypesSupportTest(); - if (getProtocolVersion().equals(ProtocolVersion.V0) || !numericTypesSupport.isSupportNumberInfinity()) { - return; - } - final AirbyteCatalog catalog = readCatalogFromFile(prefixFileNameByVersion(NUMBER_TYPE_CATALOG, getProtocolVersion())); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final List messages = readMessagesFromFile(prefixFileNameByVersion(INFINITY_TYPE_MESSAGE, getProtocolVersion())); - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - - runAndCheck(catalog, configuredCatalog, messages); - } - - @Test - public void testSyncIntegerInfinityDataType() throws Exception { - // NaN/Infinity protocol supports started from V1 version or higher - final SpecialNumericTypes numericTypesSupport = getSpecialNumericTypesSupportTest(); - if (getProtocolVersion().equals(ProtocolVersion.V0) || !numericTypesSupport.isSupportIntegerInfinity()) { - return; - } - final AirbyteCatalog catalog = readCatalogFromFile(prefixFileNameByVersion(INTEGER_TYPE_CATALOG, getProtocolVersion())); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final List messages = readMessagesFromFile(prefixFileNameByVersion(INFINITY_TYPE_MESSAGE, getProtocolVersion())); - final JsonNode config = getConfig(); - final String defaultSchema = getDefaultSchema(config); - - runAndCheck(catalog, configuredCatalog, messages); - } - - private void runAndCheck(final AirbyteCatalog catalog, final ConfiguredAirbyteCatalog configuredCatalog, final List messages) - throws Exception { - if (normalizationFromDefinition()) { - LOGGER.info("Normalization is supported! Run test with normalization."); - runAndCheckWithNormalization(messages, configuredCatalog, catalog); - } else { - LOGGER.info("Normalization is not supported! Run test without normalization."); - runAndCheckWithoutNormalization(messages, configuredCatalog, catalog); - } - } - - private static AirbyteCatalog readCatalogFromFile(final String catalogFilename) throws IOException { - return Jsons.deserialize(MoreResources.readResource(catalogFilename), AirbyteCatalog.class); - } - - private static List readMessagesFromFile(final String messagesFilename) - throws IOException { - return MoreResources.readResource(messagesFilename).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)) - .collect(Collectors.toList()); - } - - private void runAndCheckWithNormalization(final List messages, - final ConfiguredAirbyteCatalog configuredCatalog, - final AirbyteCatalog catalog) - throws Exception { - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true); - - final List actualMessages = retrieveNormalizedRecords(catalog, - getDefaultSchema(config)); - assertSameMessages(messages, actualMessages, true); - } - - private void runAndCheckWithoutNormalization(final List messages, - final ConfiguredAirbyteCatalog configuredCatalog, - final AirbyteCatalog catalog) - throws Exception { - final JsonNode config = getConfig(); - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false); - retrieveRawRecordsAndAssertSameMessages(catalog, messages, getDefaultSchema(config)); - } - - /** - * Mutate the input airbyte record message namespace. - */ - private static List getRecordMessagesWithNewNamespace( - final List airbyteMessages, - final String namespace) { - airbyteMessages.forEach(message -> { - if (message.getRecord() != null) { - message.getRecord().setNamespace(namespace); - } - }); - return airbyteMessages; - } - - /** - * Can be used in overridden {@link #getSpecialNumericTypesSupportTest() - * getSpecialNumericTypesSupportTest()} method to specify if connector supports Integer/Number NaN - * or Integer/Number Infinity types - */ - @Builder - @Getter - public static class SpecialNumericTypes { - - @Builder.Default - boolean supportIntegerNan = false; - @Builder.Default - boolean supportNumberNan = false; - @Builder.Default - boolean supportIntegerInfinity = false; - @Builder.Default - boolean supportNumberInfinity = false; - - } - - public static class NamespaceTestCaseProvider implements ArgumentsProvider { - - public static final String NAMESPACE_TEST_CASES_JSON = "namespace_test_cases.json"; - - @Override - public Stream provideArguments(final ExtensionContext context) - throws Exception { - final JsonNode testCases = - Jsons.deserialize(MoreResources.readResource(NAMESPACE_TEST_CASES_JSON)); - return MoreIterators.toList(testCases.elements()).stream() - .filter(testCase -> testCase.get("enabled").asBoolean()) - .map(testCase -> { - final String namespaceInCatalog = TestingNamespaces.generate(testCase.get("namespace").asText()); - final String namespaceInDst = TestingNamespaces - .generateFromOriginal(namespaceInCatalog, testCase.get("namespace").asText(), testCase.get("normalized").asText()); - return Arguments.of( - testCase.get("id").asText(), - // Add uniqueness to namespace to avoid collisions between tests. - namespaceInCatalog, - namespaceInDst); - }); - } - - } - - private boolean supportsNormalization() { - return supportsInDestinationNormalization() || normalizationFromDefinition(); - } - - private static V0 convertProtocolObject(final V1 v1, final Class klass) { - return Jsons.object(Jsons.jsonNode(v1), klass); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.java deleted file mode 100644 index a22bb3d2a2e8..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import io.airbyte.commons.json.Jsons; - -public class DestinationAcceptanceTestUtils { - - public static void putStringIntoJson(String stringValue, String fieldName, ObjectNode node) { - if (stringValue != null && (stringValue.startsWith("[") && stringValue.endsWith("]") - || stringValue.startsWith("{") && stringValue.endsWith("}"))) { - node.set(fieldName, Jsons.deserialize(stringValue)); - } else { - node.put(fieldName, stringValue); - } - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.java deleted file mode 100644 index f2c0c6b1e750..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; -import java.util.Arrays; -import java.util.Optional; -import java.util.function.Function; -import org.jooq.Record; - -public abstract class JdbcDestinationAcceptanceTest extends DestinationAcceptanceTest { - - protected final ObjectMapper mapper = new ObjectMapper(); - - protected JsonNode getJsonFromRecord(final Record record) { - return getJsonFromRecord(record, x -> Optional.empty()); - } - - protected JsonNode getJsonFromRecord(final Record record, final Function> valueParser) { - final ObjectNode node = mapper.createObjectNode(); - - Arrays.stream(record.fields()).forEach(field -> { - final var value = record.get(field); - - final Optional parsedValue = valueParser.apply(value); - if (parsedValue.isPresent()) { - node.put(field.getName(), parsedValue.get()); - } else { - switch (field.getDataType().getTypeName()) { - case "varchar", "nvarchar", "jsonb", "json", "other": - final var stringValue = (value != null ? value.toString() : null); - DestinationAcceptanceTestUtils.putStringIntoJson(stringValue, field.getName(), node); - break; - default: - node.put(field.getName(), (value != null ? value.toString() : null)); - } - } - }); - return node; - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.java deleted file mode 100644 index 7e016559bf92..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; -import io.airbyte.cdk.integrations.base.Destination; -import io.airbyte.commons.json.Jsons; -import io.airbyte.configoss.WorkerDestinationConfig; -import io.airbyte.protocol.models.AirbyteMessage; -import io.airbyte.workers.internal.AirbyteDestination; -import java.nio.file.Path; -import java.util.Map; -import java.util.Optional; - -/** - * Simple class to host a Destination in-memory rather than spinning up a container for it. For - * debugging and testing purposes only; not recommended to use this for real code - */ -public class LocalAirbyteDestination implements AirbyteDestination { - - private final Destination dest; - private AirbyteMessageConsumer consumer; - private boolean isClosed = false; - - public LocalAirbyteDestination(final Destination dest) { - this.dest = dest; - } - - @Override - public void start(final WorkerDestinationConfig destinationConfig, final Path jobRoot, final Map additionalEnvironmentVariables) - throws Exception { - consumer = - dest.getConsumer(destinationConfig.getDestinationConnectionConfiguration(), - Jsons.object(Jsons.jsonNode(destinationConfig.getCatalog()), io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog.class), - Destination::defaultOutputRecordCollector); - consumer.start(); - } - - @Override - public void accept(final AirbyteMessage message) throws Exception { - consumer.accept(Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.v0.AirbyteMessage.class)); - } - - @Override - public void notifyEndOfInput() { - // nothing to do here - } - - @Override - public void close() throws Exception { - consumer.close(); - isClosed = true; - } - - @Override - public void cancel() { - // nothing to do here - } - - @Override - public boolean isFinished() { - return isClosed; - } - - @Override - public int getExitValue() { - return 0; - } - - @Override - public Optional attemptRead() { - return Optional.empty(); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.java deleted file mode 100644 index 6ddf6876015a..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; -import io.airbyte.commons.json.Jsons; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import io.airbyte.protocol.models.v0.AirbyteMessage.Type; -import io.airbyte.protocol.models.v0.AirbyteStateMessage; -import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; -import io.airbyte.protocol.models.v0.AirbyteStreamState; -import io.airbyte.protocol.models.v0.StreamDescriptor; -import java.util.function.Consumer; -import org.junit.jupiter.api.Test; -import org.mockito.InOrder; -import org.mockito.Mockito; - -public abstract class PerStreamStateMessageTest { - - protected abstract Consumer getMockedConsumer(); - - protected abstract FailureTrackingAirbyteMessageConsumer getMessageConsumer(); - - @Test - void ensureAllStateMessageAreEmitted() throws Exception { - final AirbyteMessage airbyteMessage1 = AirbyteMessageCreator.createStreamStateMessage("name_one", "state_one"); - final AirbyteMessage airbyteMessage2 = AirbyteMessageCreator.createStreamStateMessage("name_two", "state_two"); - final AirbyteMessage airbyteMessage3 = AirbyteMessageCreator.createStreamStateMessage("name_three", "state_three"); - final FailureTrackingAirbyteMessageConsumer messageConsumer = getMessageConsumer(); - - messageConsumer.accept(airbyteMessage1); - messageConsumer.accept(airbyteMessage2); - messageConsumer.accept(airbyteMessage3); - - final Consumer mConsumer = getMockedConsumer(); - final InOrder inOrder = Mockito.inOrder(mConsumer); - - inOrder.verify(mConsumer).accept(airbyteMessage1); - inOrder.verify(mConsumer).accept(airbyteMessage2); - inOrder.verify(mConsumer).accept(airbyteMessage3); - } - - class AirbyteMessageCreator { - - public static AirbyteMessage createStreamStateMessage(final String name, final String value) { - return new AirbyteMessage() - .withType(Type.STATE) - .withState( - new AirbyteStateMessage() - .withType(AirbyteStateType.STREAM) - .withStream( - new AirbyteStreamState() - .withStreamDescriptor( - new StreamDescriptor() - .withName(name)) - .withStreamState(Jsons.jsonNode(value)))); - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/ProtocolVersion.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/ProtocolVersion.java deleted file mode 100644 index d95daa23dc56..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/ProtocolVersion.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -public enum ProtocolVersion { - - V0("v0"), - V1("v1"); - - private final String prefix; - - ProtocolVersion(String prefix) { - this.prefix = prefix; - } - - public String getPrefix() { - return prefix; - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.java deleted file mode 100644 index 37530ad8fcbe..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination; - -import java.time.Instant; -import java.time.LocalDate; -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeParseException; -import java.time.temporal.ChronoUnit; -import java.util.Optional; -import org.apache.commons.lang3.RandomStringUtils; - -/** - * This class is used to generate unique namespaces for tests that follow a convention so that we - * can identify and delete old namespaces. Ideally tests would always clean up their own namespaces, - * but there are exception cases that can prevent that from happening. We want to be able to - * identify namespaces for which this has happened from their name, so we can take action. - *

- * The convention we follow is `_test_YYYYMMDD_<8-character random suffix>`. - */ -public class TestingNamespaces { - - private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd"); - private static final int SUFFIX_LENGTH = 5; - public static final String STANDARD_PREFIX = "test_"; - - /** - * Generates a namespace that matches our testing namespace convention. - * - * @return convention-compliant namespace - */ - public static String generate() { - return generate(null); - } - - /** - * Generates a namespace that matches our testing namespace convention. - * - * @param prefix prefix to use for the namespace - * @return convention-compliant namespace - */ - public static String generate(final String prefix) { - final String userDefinedPrefix = prefix != null ? prefix + "_" : ""; - return userDefinedPrefix + STANDARD_PREFIX + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC"))) + "_" + generateSuffix(); - } - - public static String generateFromOriginal(final String toOverwrite, final String oldPrefix, final String newPrefix) { - return toOverwrite.replace(oldPrefix, newPrefix); - } - - /** - * Checks if a namespace is older than 2 days. - * - * @param namespace to check - * @return true if the namespace is older than 2 days, otherwise false - */ - public static boolean isOlderThan2Days(final String namespace) { - return isOlderThan(namespace, 2, ChronoUnit.DAYS); - } - - @SuppressWarnings("SameParameterValue") - private static boolean isOlderThan(final String namespace, final int timeMagnitude, final ChronoUnit timeUnit) { - return ifTestNamespaceGetDate(namespace) - .map(namespaceInstant -> namespaceInstant.isBefore(Instant.now().minus(timeMagnitude, timeUnit))) - .orElse(false); - } - - private static Optional ifTestNamespaceGetDate(final String namespace) { - final String[] parts = namespace.split("_"); - - if (parts.length < 3) { - return Optional.empty(); - } - - // need to re-add the _ since it gets pruned out by the split. - if (!STANDARD_PREFIX.equals(parts[parts.length - 3] + "_")) { - return Optional.empty(); - } - - return parseDateOrEmpty(parts[parts.length - 2]); - } - - private static Optional parseDateOrEmpty(final String dateCandidate) { - try { - return Optional.ofNullable(LocalDate.parse(dateCandidate, FORMATTER).atStartOfDay().toInstant(ZoneOffset.UTC)); - } catch (final DateTimeParseException e) { - return Optional.empty(); - } - } - - private static String generateSuffix() { - return RandomStringUtils.randomAlphabetic(SUFFIX_LENGTH).toLowerCase(); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.java deleted file mode 100644 index 991da1aed63b..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.argproviders; - -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil.getProtocolVersion; -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil.prefixFileNameByVersion; - -import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion; -import java.util.stream.Stream; -import org.junit.jupiter.api.extension.ExtensionContext; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.ArgumentsProvider; - -/** - * Class encapsulating all arguments required for Standard Destination Tests. - * - * All files defined here can be found in src/main/resources of this package. - */ -public class DataArgumentsProvider implements ArgumentsProvider { - - public static final CatalogMessageTestConfigPair EXCHANGE_RATE_CONFIG = - new CatalogMessageTestConfigPair("exchange_rate_catalog.json", "exchange_rate_messages.txt"); - public static final CatalogMessageTestConfigPair EDGE_CASE_CONFIG = - new CatalogMessageTestConfigPair("edge_case_catalog.json", "edge_case_messages.txt"); - public static final CatalogMessageTestConfigPair NAMESPACE_CONFIG = - new CatalogMessageTestConfigPair("namespace_catalog.json", "namespace_messages.txt"); - - @Override - public Stream provideArguments(final ExtensionContext context) throws Exception { - ProtocolVersion protocolVersion = getProtocolVersion(context); - return Stream.of( - Arguments.of(EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion), EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - Arguments.of(EDGE_CASE_CONFIG.getMessageFileVersion(protocolVersion), EDGE_CASE_CONFIG.getCatalogFileVersion(protocolVersion)) - // todo - need to use the new protocol to capture this. - // Arguments.of("stripe_messages.txt", "stripe_schema.json") - ); - - } - - public static class CatalogMessageTestConfigPair { - - final String catalogFile; - final String messageFile; - - public CatalogMessageTestConfigPair(final String catalogFile, final String messageFile) { - this.catalogFile = catalogFile; - this.messageFile = messageFile; - } - - public String getCatalogFileVersion(ProtocolVersion protocolVersion) { - return prefixFileNameByVersion(catalogFile, protocolVersion); - } - - public String getMessageFileVersion(ProtocolVersion protocolVersion) { - return prefixFileNameByVersion(messageFile, protocolVersion); - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.java deleted file mode 100644 index bb2bd81c9fb4..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.argproviders; - -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil.getProtocolVersion; - -import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion; -import java.util.stream.Stream; -import org.junit.jupiter.api.extension.ExtensionContext; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.ArgumentsProvider; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class DataTypeTestArgumentProvider implements ArgumentsProvider { - - private static final Logger LOGGER = LoggerFactory.getLogger(DataTypeTestArgumentProvider.class); - - public static final String INTEGER_TYPE_CATALOG = "data_type_integer_type_test_catalog.json"; - public static final String NUMBER_TYPE_CATALOG = "data_type_number_type_test_catalog.json"; - public static final String NAN_TYPE_MESSAGE = "nan_type_test_message.txt"; - public static final String INFINITY_TYPE_MESSAGE = "nan_type_test_message.txt"; - public static final CatalogMessageTestConfigWithCompatibility BASIC_TEST = - new CatalogMessageTestConfigWithCompatibility("data_type_basic_test_catalog.json", "data_type_basic_test_messages.txt", - new TestCompatibility(true, false, false)); - public static final CatalogMessageTestConfigWithCompatibility ARRAY_TEST = - new CatalogMessageTestConfigWithCompatibility("data_type_array_test_catalog.json", "data_type_array_test_messages.txt", - new TestCompatibility(true, true, false)); - public static final CatalogMessageTestConfigWithCompatibility OBJECT_TEST = - new CatalogMessageTestConfigWithCompatibility("data_type_object_test_catalog.json", "data_type_object_test_messages.txt", - new TestCompatibility(true, false, true)); - public static final CatalogMessageTestConfigWithCompatibility OBJECT_WITH_ARRAY_TEST = - new CatalogMessageTestConfigWithCompatibility("data_type_array_object_test_catalog.json", "data_type_array_object_test_messages.txt", - new TestCompatibility(true, true, true)); - private ProtocolVersion protocolVersion; - - @Override - public Stream provideArguments(ExtensionContext context) throws Exception { - protocolVersion = getProtocolVersion(context); - return Stream.of( - getArguments(BASIC_TEST), - getArguments(ARRAY_TEST), - getArguments(OBJECT_TEST), - getArguments(OBJECT_WITH_ARRAY_TEST)); - } - - private Arguments getArguments(CatalogMessageTestConfigWithCompatibility testConfig) { - return Arguments.of(testConfig.getMessageFileVersion(protocolVersion), testConfig.getCatalogFileVersion(protocolVersion), - testConfig.testCompatibility); - } - - public record TestCompatibility(boolean requireBasicCompatibility, - boolean requireArrayCompatibility, - boolean requireObjectCompatibility) { - - public boolean isTestCompatible(boolean supportBasicDataTypeTest, boolean supportArrayDataTypeTest, boolean supportObjectDataTypeTest) { - LOGGER.info("---- Data type test compatibility ----"); - LOGGER.info("| Data type test | Require | Support |"); - LOGGER.info("| Basic test | {} | {} |", (requireBasicCompatibility ? "true " : "false"), - (supportBasicDataTypeTest ? "true " : "false")); - LOGGER.info("| Array test | {} | {} |", (requireArrayCompatibility ? "true " : "false"), - (supportArrayDataTypeTest ? "true " : "false")); - LOGGER.info("| Object test | {} | {} |", (requireObjectCompatibility ? "true " : "false"), - (supportObjectDataTypeTest ? "true " : "false")); - LOGGER.info("--------------------------------------"); - - if (requireBasicCompatibility && !supportBasicDataTypeTest) { - LOGGER.warn("The destination doesn't support required Basic data type test. The test is skipped!"); - return false; - } - if (requireArrayCompatibility && !supportArrayDataTypeTest) { - LOGGER.warn("The destination doesn't support required Array data type test. The test is skipped!"); - return false; - } - if (requireObjectCompatibility && !supportObjectDataTypeTest) { - LOGGER.warn("The destination doesn't support required Object data type test. The test is skipped!"); - return false; - } - - return true; - } - - } - - public static class CatalogMessageTestConfigWithCompatibility extends DataArgumentsProvider.CatalogMessageTestConfigPair { - - final TestCompatibility testCompatibility; - - public CatalogMessageTestConfigWithCompatibility(String catalogFile, String messageFile, TestCompatibility testCompatibility) { - super(catalogFile, messageFile); - this.testCompatibility = testCompatibility; - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.java deleted file mode 100644 index 41de26d32b19..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.argproviders; - -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil.getProtocolVersion; -import static io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil.prefixFileNameByVersion; - -import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion; -import java.util.stream.Stream; -import org.junit.jupiter.api.extension.ExtensionContext; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.ArgumentsProvider; - -public class NumberDataTypeTestArgumentProvider implements ArgumentsProvider { - - public static final String NUMBER_DATA_TYPE_TEST_CATALOG = "number_data_type_test_catalog.json"; - public static final String NUMBER_DATA_TYPE_TEST_MESSAGES = "number_data_type_test_messages.txt"; - public static final String NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG = "number_data_type_array_test_catalog.json"; - public static final String NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES = "number_data_type_array_test_messages.txt"; - private ProtocolVersion protocolVersion; - - @Override - public Stream provideArguments(ExtensionContext context) throws Exception { - protocolVersion = getProtocolVersion(context); - return Stream.of( - getArguments(NUMBER_DATA_TYPE_TEST_CATALOG, NUMBER_DATA_TYPE_TEST_MESSAGES), - getArguments(NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG, NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES)); - } - - private Arguments getArguments(final String catalogFile, final String messageFile) { - return Arguments.of(prefixFileNameByVersion(catalogFile, protocolVersion), prefixFileNameByVersion(messageFile, protocolVersion)); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.java deleted file mode 100644 index 23a8454add98..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.argproviders.util; - -import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion; -import java.lang.reflect.Method; -import org.junit.jupiter.api.extension.ExtensionContext; - -public class ArgumentProviderUtil { - - private static final String PROTOCOL_VERSION_METHOD_NAME = "getProtocolVersion"; - - /** - * This method use - * {@link io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion#getPrefix()} to - * prefix the file name. - *

- * example: - *

- * filename.json -> v0/filename.json - * - * @param fileName the original file name - * @param protocolVersion supported protocol version - * @return filename with protocol version prefix - */ - public static String prefixFileNameByVersion(final String fileName, ProtocolVersion protocolVersion) { - return String.format("%s/%s", protocolVersion.getPrefix(), fileName); - } - - /** - * This method use reflection to get protocol version method from provided test context. - *

- * NOTE: getProtocolVersion method should be public. - * - * @param context the context in which the current test is being executed. - * @return supported protocol version - */ - public static ProtocolVersion getProtocolVersion(ExtensionContext context) throws Exception { - Class c = context.getRequiredTestClass(); - // NOTE: Method should be public - Method m = c.getMethod(PROTOCOL_VERSION_METHOD_NAME); - return (ProtocolVersion) m.invoke(c.getDeclaredConstructor().newInstance()); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.java deleted file mode 100644 index d39eeb794cb5..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.java +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.comparator; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.fasterxml.jackson.databind.JsonNode; -import java.time.ZoneOffset; -import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeParseException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class AdvancedTestDataComparator implements TestDataComparator { - - private static final Logger LOGGER = LoggerFactory.getLogger(AdvancedTestDataComparator.class); - - public static final String AIRBYTE_DATE_FORMAT = "yyyy-MM-dd"; - public static final String AIRBYTE_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss"; - public static final String AIRBYTE_DATETIME_PARSED_FORMAT = "yyyy-MM-dd HH:mm:ss.S"; - public static final String AIRBYTE_DATETIME_PARSED_FORMAT_TZ = "yyyy-MM-dd HH:mm:ss XXX"; - public static final String AIRBYTE_DATETIME_WITH_TZ_FORMAT = "[yyyy][yy]['-']['/']['.'][' '][MMM][MM][M]['-']['/']['.'][' '][dd][d]" - + "[[' ']['T']HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X][' '][G]]]"; - - // TODO revisit dataset which used date as string: exchange_rate_catalog.json - // tried to change it to date time type but some connectors failed to store it e.i. - // bigquery-denormalized - private static final Set TEST_DATASET_IGNORE_LIST = - Set.of( - "2020-08-29T00:00:00Z", - "2020-08-30T00:00:00Z", - "2020-08-31T00:00:00Z", - "2020-09-01T00:00:00Z", - "2020-09-15T16:58:52.000000Z", - "2020-03-31T00:00:00Z"); - - @Override - public void assertSameData(List expected, List actual) { - LOGGER.info("Expected data {}", expected); - LOGGER.info("Actual data {}", actual); - assertEquals(expected.size(), actual.size()); - final Iterator expectedIterator = expected.iterator(); - final Iterator actualIterator = actual.iterator(); - while (expectedIterator.hasNext() && actualIterator.hasNext()) { - compareObjects(expectedIterator.next(), actualIterator.next()); - } - } - - protected List resolveIdentifier(final String identifier) { - return List.of(identifier); - } - - protected void compareObjects(final JsonNode expectedObject, final JsonNode actualObject) { - if (!areBothEmpty(expectedObject, actualObject)) { - LOGGER.info("Expected Object : {}", expectedObject); - LOGGER.info("Actual Object : {}", actualObject); - final Iterator> expectedDataIterator = expectedObject.fields(); - while (expectedDataIterator.hasNext()) { - final Map.Entry expectedEntry = expectedDataIterator.next(); - final JsonNode expectedValue = expectedEntry.getValue(); - String key = expectedEntry.getKey(); - JsonNode actualValue = ComparatorUtils.getActualValueByExpectedKey(key, actualObject, this::resolveIdentifier); - LOGGER.info("For {} Expected {} vs Actual {}", key, expectedValue, actualValue); - assertSameValue(expectedValue, actualValue); - } - } else { - LOGGER.info("Both rows are empty."); - } - } - - private boolean isJsonNodeEmpty(final JsonNode jsonNode) { - return jsonNode.isEmpty() || (jsonNode.size() == 1 && jsonNode.iterator().next().asText().isEmpty()); - } - - private boolean areBothEmpty(final JsonNode expectedData, final JsonNode actualData) { - return isJsonNodeEmpty(expectedData) && isJsonNodeEmpty(actualData); - } - - // Allows subclasses to implement custom comparison asserts - protected void assertSameValue(final JsonNode expectedValue, final JsonNode actualValue) { - LOGGER.info("assertSameValue : {} vs {}", expectedValue, actualValue); - - assertTrue(compareJsonNodes(expectedValue, actualValue), "Expected value " + expectedValue + " vs Actual value " + actualValue); - } - - protected boolean compareJsonNodes(final JsonNode expectedValue, final JsonNode actualValue) { - if (expectedValue == null || actualValue == null) { - return expectedValue == null && actualValue == null; - } else if (isNumeric(expectedValue.asText())) { - return compareNumericValues(expectedValue.asText(), actualValue.asText()); - } else if (expectedValue.isBoolean()) { - return compareBooleanValues(expectedValue.asText(), actualValue.asText()); - } else if (isDateTimeWithTzValue(expectedValue.asText())) { - return compareDateTimeWithTzValues(expectedValue.asText(), actualValue.asText()); - } else if (isDateTimeValue(expectedValue.asText())) { - return compareDateTimeValues(expectedValue.asText(), actualValue.asText()); - } else if (isDateValue(expectedValue.asText())) { - return compareDateValues(expectedValue.asText(), actualValue.asText()); - } else if (isTimeWithTimezone(expectedValue.asText())) { - return compareTimeWithTimeZone(expectedValue.asText(), actualValue.asText()); - } else if (isTimeWithoutTimezone(expectedValue.asText())) { - return compareTimeWithoutTimeZone(expectedValue.asText(), actualValue.asText()); - } else if (expectedValue.isArray()) { - return compareArrays(expectedValue, actualValue); - } else if (expectedValue.isObject()) { - compareObjects(expectedValue, actualValue); - return true; - } else { - LOGGER.warn("Default comparison method!"); - return compareString(expectedValue, actualValue); - } - } - - protected boolean compareString(final JsonNode expectedValue, final JsonNode actualValue) { - return expectedValue.asText().equals(actualValue.asText()); - } - - private boolean isNumeric(final String value) { - return value.matches("-?\\d+(\\.\\d+)?"); - } - - private List getArrayList(final JsonNode jsonArray) { - List result = new ArrayList<>(); - jsonArray.elements().forEachRemaining(result::add); - return result; - } - - protected boolean compareArrays(final JsonNode expectedArray, final JsonNode actualArray) { - var expectedList = getArrayList(expectedArray); - var actualList = getArrayList(actualArray); - - if (expectedList.size() != actualList.size()) { - return false; - } else { - for (JsonNode expectedNode : expectedList) { - var sameActualNode = actualList.stream().filter(actualNode -> compareJsonNodes(expectedNode, actualNode)).findFirst(); - if (sameActualNode.isPresent()) { - actualList.remove(sameActualNode.get()); - } else { - return false; - } - } - return true; - } - } - - protected boolean compareBooleanValues(final String firstBooleanValue, final String secondBooleanValue) { - return Boolean.parseBoolean(firstBooleanValue) == Boolean.parseBoolean(secondBooleanValue); - } - - protected boolean compareNumericValues(final String firstNumericValue, final String secondNumericValue) { - double firstValue = Double.parseDouble(firstNumericValue); - double secondValue = Double.parseDouble(secondNumericValue); - - return firstValue == secondValue; - } - - protected DateTimeFormatter getAirbyteDateTimeWithTzFormatter() { - return DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_WITH_TZ_FORMAT); - } - - protected DateTimeFormatter getAirbyteDateTimeParsedWithTzFormatter() { - return DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_PARSED_FORMAT_TZ); - } - - protected boolean isDateTimeWithTzValue(final String value) { - return !TEST_DATASET_IGNORE_LIST.contains(value) && - value.matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$"); - } - - protected ZonedDateTime parseDestinationDateWithTz(final String destinationValue) { - return ZonedDateTime.parse(destinationValue, DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_WITH_TZ_FORMAT)).withZoneSameInstant(ZoneOffset.UTC); - } - - protected boolean compareDateTimeWithTzValues(final String airbyteMessageValue, final String destinationValue) { - try { - ZonedDateTime airbyteDate = ZonedDateTime.parse(airbyteMessageValue, getAirbyteDateTimeWithTzFormatter()).withZoneSameInstant(ZoneOffset.UTC); - ZonedDateTime destinationDate = parseDestinationDateWithTz(destinationValue); - return airbyteDate.equals(destinationDate); - } catch (DateTimeParseException e) { - LOGGER.warn("Fail to convert values to ZonedDateTime. Try to compare as text. Airbyte value({}), Destination value ({}). Exception: {}", - airbyteMessageValue, destinationValue, e); - return compareTextValues(airbyteMessageValue, destinationValue); - } - } - - protected boolean isDateTimeValue(final String value) { - return value.matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$"); - } - - protected boolean isTimeWithTimezone(final String value) { - return value.matches("^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})$"); - } - - protected boolean isTimeWithoutTimezone(final String value) { - return value.matches("^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?$"); - } - - protected boolean compareDateTimeValues(final String airbyteMessageValue, final String destinationValue) { - return compareTextValues(airbyteMessageValue, destinationValue); - } - - protected boolean isDateValue(final String value) { - return value.matches("^\\d{4}-\\d{2}-\\d{2}( BC)?$"); - } - - protected boolean compareDateValues(final String airbyteMessageValue, final String destinationValue) { - return compareTextValues(airbyteMessageValue, destinationValue); - } - - protected boolean compareTimeWithoutTimeZone(final String airbyteMessageValue, final String destinationValue) { - return compareTextValues(airbyteMessageValue, destinationValue); - } - - protected boolean compareTimeWithTimeZone(final String airbyteMessageValue, final String destinationValue) { - return compareTextValues(airbyteMessageValue, destinationValue); - } - - protected boolean compareTextValues(final String firstValue, final String secondValue) { - return firstValue.equals(secondValue); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.java deleted file mode 100644 index 93da63e5aa02..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.comparator; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.fasterxml.jackson.databind.JsonNode; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.function.Function; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class BasicTestDataComparator implements TestDataComparator { - - private static final Logger LOGGER = LoggerFactory.getLogger(BasicTestDataComparator.class); - - private final Function> nameResolver; - - public BasicTestDataComparator(Function> nameResolver) { - this.nameResolver = nameResolver; - } - - @Override - public void assertSameData(List expected, List actual) { - LOGGER.info("Expected data {}", expected); - LOGGER.info("Actual data {}", actual); - assertEquals(expected.size(), actual.size()); - final Iterator expectedIterator = expected.iterator(); - final Iterator actualIterator = actual.iterator(); - while (expectedIterator.hasNext() && actualIterator.hasNext()) { - final JsonNode expectedData = expectedIterator.next(); - final JsonNode actualData = actualIterator.next(); - final Iterator> expectedDataIterator = expectedData.fields(); - LOGGER.info("Expected row {}", expectedData); - LOGGER.info("Actual row {}", actualData); - assertEquals(expectedData.size(), actualData.size(), "Unequal row size"); - while (expectedDataIterator.hasNext()) { - final Map.Entry expectedEntry = expectedDataIterator.next(); - final JsonNode expectedValue = expectedEntry.getValue(); - String key = expectedEntry.getKey(); - JsonNode actualValue = ComparatorUtils.getActualValueByExpectedKey(key, actualData, nameResolver); - LOGGER.info("For {} Expected {} vs Actual {}", key, expectedValue, actualValue); - assertSameValue(expectedValue, actualValue); - } - } - } - - // Allows subclasses to implement custom comparison asserts - protected void assertSameValue(final JsonNode expectedValue, final JsonNode actualValue) { - assertEquals(expectedValue, actualValue); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/ComparatorUtils.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/ComparatorUtils.java deleted file mode 100644 index 0b077c5cf1dc..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/ComparatorUtils.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.comparator; - -import com.fasterxml.jackson.databind.JsonNode; -import java.util.List; -import java.util.function.Function; - -public class ComparatorUtils { - - public static JsonNode getActualValueByExpectedKey(final String expectedKey, - final JsonNode actualJsonNode, - final Function> nameResolver) { - for (final String actualKey : nameResolver.apply(expectedKey)) { - if (actualJsonNode.has(actualKey)) { - return actualJsonNode.get(actualKey); - } - } - return null; - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/TestDataComparator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/TestDataComparator.java deleted file mode 100644 index ca5f4a229469..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/comparator/TestDataComparator.java +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.comparator; - -import com.fasterxml.jackson.databind.JsonNode; -import java.util.List; - -public interface TestDataComparator { - - void assertSameData(final List expected, final List actual); - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java deleted file mode 100644 index fcdc6f7456e0..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.typing_deduping; - -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_ID; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_RAW_ID; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_EMITTED_AT; -import static io.airbyte.cdk.integrations.base.JavaBaseConstants.LEGACY_RAW_TABLE_COLUMNS; -import static org.jooq.impl.DSL.field; -import static org.jooq.impl.DSL.quotedName; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.JavaBaseConstants; -import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; -import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; -import io.airbyte.integrations.base.destination.typing_deduping.BaseSqlGeneratorIntegrationTest; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.List; -import org.jooq.DSLContext; -import org.jooq.DataType; -import org.jooq.Field; -import org.jooq.InsertValuesStepN; -import org.jooq.Name; -import org.jooq.Record; -import org.jooq.SQLDialect; -import org.jooq.conf.ParamType; -import org.jooq.impl.DSL; -import org.jooq.impl.SQLDataType; - -public abstract class JdbcSqlGeneratorIntegrationTest - extends BaseSqlGeneratorIntegrationTest { - - protected abstract JdbcDatabase getDatabase(); - - protected abstract DataType getStructType(); - - // TODO - can we move this class into db_destinations/testFixtures? - // then we could redefine getSqlGenerator() to return a JdbcSqlGenerator - // and this could be a private method getSqlGenerator().getTimestampWithTimeZoneType() - private DataType getTimestampWithTimeZoneType() { - return getSqlGenerator().toDialectType(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE); - } - - @Override - protected abstract JdbcSqlGenerator getSqlGenerator(); - - protected abstract SQLDialect getSqlDialect(); - - private DSLContext getDslContext() { - return DSL.using(getSqlDialect()); - } - - /** - * Many destinations require special handling to create JSON values. For example, redshift requires - * you to invoke JSON_PARSE('{...}'), and postgres requires you to CAST('{...}' AS JSONB). This - * method allows subclasses to implement that logic. - */ - protected abstract Field toJsonValue(String valueAsString); - - private void insertRecords(final Name tableName, - final List columnNames, - final List records, - final String... columnsToParseJson) - throws SQLException { - InsertValuesStepN insert = getDslContext().insertInto( - DSL.table(tableName), - columnNames.stream().map(columnName -> field(quotedName(columnName))).toList()); - for (final JsonNode record : records) { - insert = insert.values( - columnNames.stream() - .map(fieldName -> { - // Convert this field to a string. Pretty naive implementation. - final JsonNode column = record.get(fieldName); - final String columnAsString; - if (column == null) { - columnAsString = null; - } else if (column.isTextual()) { - columnAsString = column.asText(); - } else { - columnAsString = column.toString(); - } - - if (Arrays.asList(columnsToParseJson).contains(fieldName)) { - return toJsonValue(columnAsString); - } else { - return DSL.val(columnAsString); - } - }) - .toList()); - } - getDatabase().execute(insert.getSQL(ParamType.INLINED)); - } - - @Override - protected void createNamespace(final String namespace) throws Exception { - getDatabase().execute(getDslContext().createSchemaIfNotExists(namespace).getSQL(ParamType.INLINED)); - } - - @Override - protected void createRawTable(final StreamId streamId) throws Exception { - getDatabase().execute(getDslContext().createTable(DSL.name(streamId.getRawNamespace(), streamId.getRawName())) - .column(COLUMN_NAME_AB_RAW_ID, SQLDataType.VARCHAR(36).nullable(false)) - .column(COLUMN_NAME_AB_EXTRACTED_AT, getTimestampWithTimeZoneType().nullable(false)) - .column(COLUMN_NAME_AB_LOADED_AT, getTimestampWithTimeZoneType()) - .column(COLUMN_NAME_DATA, getStructType().nullable(false)) - .column(COLUMN_NAME_AB_META, getStructType().nullable(true)) - .getSQL(ParamType.INLINED)); - } - - @Override - protected void createV1RawTable(final StreamId v1RawTable) throws Exception { - getDatabase().execute(getDslContext().createTable(DSL.name(v1RawTable.getRawNamespace(), v1RawTable.getRawName())) - .column(COLUMN_NAME_AB_ID, SQLDataType.VARCHAR(36).nullable(false)) - .column(COLUMN_NAME_EMITTED_AT, getTimestampWithTimeZoneType().nullable(false)) - .column(COLUMN_NAME_DATA, getStructType().nullable(false)) - .getSQL(ParamType.INLINED)); - } - - @Override - public void insertRawTableRecords(final StreamId streamId, final List records) throws Exception { - insertRecords( - DSL.name(streamId.getRawNamespace(), streamId.getRawName()), - JavaBaseConstants.V2_RAW_TABLE_COLUMN_NAMES, - records, - COLUMN_NAME_DATA, - COLUMN_NAME_AB_META); - } - - @Override - protected void insertV1RawTableRecords(final StreamId streamId, final List records) throws Exception { - insertRecords( - DSL.name(streamId.getRawNamespace(), streamId.getRawName()), - LEGACY_RAW_TABLE_COLUMNS, - records, - COLUMN_NAME_DATA); - } - - @Override - protected void insertFinalTableRecords(final boolean includeCdcDeletedAt, - final StreamId streamId, - final String suffix, - final List records) - throws Exception { - final List columnNames = - includeCdcDeletedAt ? BaseSqlGeneratorIntegrationTest.FINAL_TABLE_COLUMN_NAMES_CDC : BaseSqlGeneratorIntegrationTest.FINAL_TABLE_COLUMN_NAMES; - insertRecords( - DSL.name(streamId.getFinalNamespace(), streamId.getFinalName() + suffix), - columnNames, - records, - COLUMN_NAME_AB_META, "struct", "array", "unknown"); - } - - @Override - protected List dumpRawTableRecords(final StreamId streamId) throws Exception { - return getDatabase() - .queryJsons(getDslContext().selectFrom(DSL.name(streamId.getRawNamespace(), streamId.getRawName())).getSQL(ParamType.INLINED)); - } - - @Override - protected List dumpFinalTableRecords(final StreamId streamId, final String suffix) throws Exception { - return getDatabase() - .queryJsons(getDslContext().selectFrom(DSL.name(streamId.getFinalNamespace(), streamId.getFinalName() + suffix)).getSQL(ParamType.INLINED)); - } - - @Override - protected void teardownNamespace(final String namespace) throws Exception { - getDatabase().execute(getDslContext().dropSchema(namespace).cascade().getSQL(ParamType.INLINED)); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java deleted file mode 100644 index 48b85ec7f0c8..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.standardtest.destination.typing_deduping; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import io.airbyte.cdk.db.JdbcCompatibleSourceOperations; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.JavaBaseConstants; -import io.airbyte.commons.text.Names; -import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import java.util.List; -import javax.sql.DataSource; -import org.jooq.impl.DSL; - -/** - * This class is largely the same as - * {@link io.airbyte.integrations.destination.snowflake.typing_deduping.AbstractSnowflakeTypingDedupingTest}. - * But (a) it uses jooq to construct the sql statements, and (b) it doesn't need to upcase anything. - * At some point we might (?) want to do a refactor to combine them. - */ -public abstract class JdbcTypingDedupingTest extends BaseTypingDedupingTest { - - protected JdbcDatabase database; - private DataSource dataSource; - - /** - * Get the config as declared in GSM (or directly from the testcontainer). This class will do - * further modification to the config to ensure test isolation.i - */ - protected abstract ObjectNode getBaseConfig(); - - protected abstract DataSource getDataSource(JsonNode config); - - /** - * Subclasses may need to return a custom source operations if the default one does not handle - * vendor-specific types correctly. For example, you most likely need to override this method to - * deserialize JSON columns to JsonNode. - */ - protected JdbcCompatibleSourceOperations getSourceOperations() { - return JdbcUtils.defaultSourceOperations; - } - - /** - * Subclasses using a config with a nonstandard raw table schema should override this method. - */ - protected String getRawSchema() { - return JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE; - } - - /** - * Subclasses using a config where the default schema is not in the {@code schema} key should - * override this method and {@link #setDefaultSchema(JsonNode, String)}. - */ - protected String getDefaultSchema(final JsonNode config) { - return config.get("schema").asText(); - } - - /** - * Subclasses using a config where the default schema is not in the {@code schema} key should - * override this method and {@link #getDefaultSchema(JsonNode)}. - */ - protected void setDefaultSchema(final JsonNode config, final String schema) { - ((ObjectNode) config).put("schema", schema); - } - - @Override - protected JsonNode generateConfig() { - final JsonNode config = getBaseConfig(); - setDefaultSchema(config, "typing_deduping_default_schema" + getUniqueSuffix()); - dataSource = getDataSource(config); - database = new DefaultJdbcDatabase(dataSource, getSourceOperations()); - return config; - } - - @Override - protected List dumpRawTableRecords(String streamNamespace, final String streamName) throws Exception { - if (streamNamespace == null) { - streamNamespace = getDefaultSchema(getConfig()); - } - final String tableName = StreamId.concatenateRawTableName(streamNamespace, Names.toAlphanumericAndUnderscore(streamName)); - final String schema = getRawSchema(); - return database.queryJsons(DSL.selectFrom(DSL.name(schema, tableName)).getSQL()); - } - - @Override - public List dumpFinalTableRecords(String streamNamespace, final String streamName) throws Exception { - if (streamNamespace == null) { - streamNamespace = getDefaultSchema(getConfig()); - } - return database.queryJsons(DSL.selectFrom(DSL.name(streamNamespace, Names.toAlphanumericAndUnderscore(streamName))).getSQL()); - } - - @Override - protected void teardownStreamAndNamespace(String streamNamespace, final String streamName) throws Exception { - if (streamNamespace == null) { - streamNamespace = getDefaultSchema(getConfig()); - } - database.execute(DSL.dropTableIfExists(DSL.name(getRawSchema(), StreamId.concatenateRawTableName(streamNamespace, streamName))).getSQL()); - database.execute(DSL.dropSchemaIfExists(DSL.name(streamNamespace)).cascade().getSQL()); - } - - @Override - protected void globalTeardown() throws Exception { - DataSourceFactory.close(dataSource); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt new file mode 100644 index 000000000000..8fab95caa21e --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt @@ -0,0 +1,2358 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import com.google.common.collect.ImmutableMap +import com.google.common.collect.Lists +import com.google.common.collect.Sets +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.standardtest.destination.* +import io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataArgumentsProvider +import io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider +import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import io.airbyte.cdk.integrations.standardtest.destination.comparator.BasicTestDataComparator +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator +import io.airbyte.commons.features.EnvVariableFeatureFlags +import io.airbyte.commons.jackson.MoreMappers +import io.airbyte.commons.json.Jsons +import io.airbyte.commons.lang.Exceptions +import io.airbyte.commons.resources.MoreResources +import io.airbyte.commons.util.MoreIterators +import io.airbyte.configoss.JobGetSpecConfig +import io.airbyte.configoss.OperatorDbt +import io.airbyte.configoss.StandardCheckConnectionInput +import io.airbyte.configoss.StandardCheckConnectionOutput +import io.airbyte.configoss.StandardCheckConnectionOutput.Status +import io.airbyte.configoss.WorkerDestinationConfig +import io.airbyte.protocol.models.Field +import io.airbyte.protocol.models.JsonSchemaType +import io.airbyte.protocol.models.v0.AirbyteCatalog +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteMessage.Type +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStateMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConnectorSpecification +import io.airbyte.protocol.models.v0.DestinationSyncMode +import io.airbyte.protocol.models.v0.SyncMode +import io.airbyte.workers.exception.TestHarnessException +import io.airbyte.workers.general.DbtTransformationRunner +import io.airbyte.workers.general.DefaultCheckConnectionTestHarness +import io.airbyte.workers.general.DefaultGetSpecTestHarness +import io.airbyte.workers.helper.ConnectorConfigUpdater +import io.airbyte.workers.helper.EntrypointEnvChecker +import io.airbyte.workers.internal.AirbyteDestination +import io.airbyte.workers.internal.DefaultAirbyteDestination +import io.airbyte.workers.normalization.DefaultNormalizationRunner +import io.airbyte.workers.normalization.NormalizationRunner +import io.airbyte.workers.process.AirbyteIntegrationLauncher +import io.airbyte.workers.process.DockerProcessFactory +import io.airbyte.workers.process.ProcessFactory +import java.io.IOException +import java.io.UncheckedIOException +import java.net.URISyntaxException +import java.nio.file.Files +import java.nio.file.Path +import java.time.Instant +import java.util.* +import java.util.concurrent.atomic.AtomicInteger +import java.util.function.Consumer +import java.util.stream.Collectors +import java.util.stream.Stream +import kotlin.Comparator +import kotlin.collections.ArrayList +import kotlin.collections.HashSet +import org.junit.jupiter.api.* +import org.junit.jupiter.api.extension.ExtensionContext +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.ArgumentsProvider +import org.junit.jupiter.params.provider.ArgumentsSource +import org.mockito.Mockito +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +abstract class DestinationAcceptanceTest { + protected lateinit var TEST_SCHEMAS: HashSet + + private lateinit var testEnv: TestDestinationEnv + + private var jobRoot: Path? = null + private var processFactory: ProcessFactory? = null + private var mConnectorConfigUpdater: ConnectorConfigUpdater? = null + + protected var localRoot: Path? = null + open protected var _testDataComparator: TestDataComparator = getTestDataComparator() + + open fun getTestDataComparator(): TestDataComparator { + return BasicTestDataComparator { this.resolveIdentifier(it) } + } + + protected abstract val imageName: String + /** + * Name of the docker image that the tests will run against. + * + * @return docker image name + */ + get + + protected fun supportsInDestinationNormalization(): Boolean { + return false + } + + protected fun inDestinationNormalizationFlags(shouldNormalize: Boolean): Map { + if (shouldNormalize && supportsInDestinationNormalization()) { + return java.util.Map.of("NORMALIZATION_TECHNIQUE", "LEGACY") + } + return emptyMap() + } + + private val imageNameWithoutTag: String + get() = + if (imageName.contains(":")) + imageName.split(":".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()[0] + else imageName + + private fun readMetadata(): JsonNode { + return try { + Jsons.jsonNodeFromFile(MoreResources.readResourceAsFile("metadata.yaml")) + } catch (e: IllegalArgumentException) { + // Resource is not found. + Jsons.emptyObject() + } catch (e: URISyntaxException) { + Jsons.emptyObject() + } catch (e: IOException) { + throw UncheckedIOException(e) + } + } + + protected val normalizationImageName: String? + get() { + val metadata = readMetadata()["data"] ?: return null + val normalizationConfig = metadata["normalizationConfig"] ?: return null + val normalizationRepository = + normalizationConfig["normalizationRepository"] ?: return null + return normalizationRepository.asText() + ":" + NORMALIZATION_VERSION + } + + /** + * Configuration specific to the integration. Will be passed to integration where appropriate in + * each test. Should be valid. + * + * @return integration-specific configuration + */ + @Throws(Exception::class) protected abstract fun getConfig(): JsonNode + + /** + * Configuration specific to the integration. Will be passed to integration where appropriate in + * tests that test behavior when configuration is invalid. e.g incorrect password. Should be + * invalid. + * + * @return integration-specific configuration + */ + @Throws(Exception::class) protected abstract fun getFailCheckConfig(): JsonNode? + + /** + * Function that returns all of the records in destination as json at the time this method is + * invoked. These will be used to check that the data actually written is what should actually + * be there. Note: this returns a set and does not test any order guarantees. + * + * @param testEnv + * - information about the test environment. + * @param streamName + * - name of the stream for which we are retrieving records. + * @param namespace + * - the destination namespace records are located in. Null if not applicable. Usually a JDBC + * schema. + * @param streamSchema + * - schema of the stream to be retrieved. This is only necessary for destinations in which data + * types cannot be accurately inferred (e.g. in CSV destination, every value is a string). + * @return All of the records in the destination at the time this method is invoked. + * @throws Exception + * - can throw any exception, test framework will handle. + */ + @Throws(Exception::class) + protected abstract fun retrieveRecords( + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, + streamSchema: JsonNode + ): List + + /** + * Returns a destination's default schema. The default implementation assumes this corresponds + * to the configuration's 'schema' field, as this is how most of our destinations implement + * this. Destinations are free to appropriately override this. The return value is used to + * assert correctness. + * + * If not applicable, Destinations are free to ignore this. + * + * @param config + * - integration-specific configuration returned by [.getConfig]. + * @return the default schema, if applicatble. + */ + @Throws(Exception::class) + protected open fun getDefaultSchema(config: JsonNode): String? { + if (config["schema"] == null) { + return null + } + val schema = config["schema"].asText() + TEST_SCHEMAS!!.add(schema) + return schema + } + + /** + * Override to return true if a destination implements namespaces and should be tested as such. + */ + protected fun implementsNamespaces(): Boolean { + return false + } + + /** + * Detects if a destination implements append mode from the spec.json that should include + * 'supportsIncremental' = true + * + * @return + * - a boolean. + */ + @Throws(TestHarnessException::class) + protected fun implementsAppend(): Boolean { + val spec = runSpec() + Assertions.assertNotNull(spec) + return if (spec.supportsIncremental != null) { + spec.supportsIncremental + } else { + false + } + } + + protected fun normalizationFromDefinition(): Boolean { + val metadata = readMetadata()["data"] ?: return false + val normalizationConfig = metadata["normalizationConfig"] ?: return false + return normalizationConfig.has("normalizationRepository") && + normalizationConfig.has("normalizationTag") + } + + protected fun dbtFromDefinition(): Boolean { + val metadata = readMetadata()["data"] ?: return false + val supportsDbt = metadata["supportsDbt"] + return supportsDbt != null && supportsDbt.asBoolean(false) + } + + protected val destinationDefinitionKey: String + get() = imageNameWithoutTag + + protected val normalizationIntegrationType: String? + get() { + val metadata = readMetadata()["data"] ?: return null + val normalizationConfig = metadata["normalizationConfig"] ?: return null + val normalizationIntegrationType = + normalizationConfig["normalizationIntegrationType"] ?: return null + return normalizationIntegrationType.asText() + } + + /** + * Detects if a destination implements append dedup mode from the spec.json that should include + * 'supportedDestinationSyncMode' + * + * @return + * - a boolean. + */ + @Throws(TestHarnessException::class) + protected fun implementsAppendDedup(): Boolean { + val spec = runSpec() + Assertions.assertNotNull(spec) + return if (spec.supportedDestinationSyncModes != null) { + spec.supportedDestinationSyncModes.contains(DestinationSyncMode.APPEND_DEDUP) + } else { + false + } + } + + /** + * Detects if a destination implements overwrite mode from the spec.json that should include + * 'supportedDestinationSyncMode' + * + * @return + * - a boolean. + */ + @Throws(TestHarnessException::class) + protected fun implementsOverwrite(): Boolean { + val spec = runSpec() + Assertions.assertNotNull(spec) + return if (spec.supportedDestinationSyncModes != null) { + spec.supportedDestinationSyncModes.contains(DestinationSyncMode.OVERWRITE) + } else { + false + } + } + + /** + * Same idea as [.retrieveRecords]. Except this method should pull records from the table that + * contains the normalized records and convert them back into the data as it would appear in an + * [AirbyteRecordMessage]. Only need to override this method if [.normalizationFromDefinition] + * returns true. + * + * @param testEnv + * - information about the test environment. + * @param streamName + * - name of the stream for which we are retrieving records. + * @param namespace + * - the destination namespace records are located in. Null if not applicable. Usually a JDBC + * schema. + * @return All of the records in the destination at the time this method is invoked. + * @throws Exception + * - can throw any exception, test framework will handle. + */ + @Throws(Exception::class) + protected fun retrieveNormalizedRecords( + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String? + ): List { + throw IllegalStateException("Not implemented") + } + + /** + * Function that performs any setup of external resources required for the test. e.g. + * instantiate a postgres database. This function will be called before EACH test. + * + * @param testEnv + * - information about the test environment. + * @param TEST_SCHEMAS + * @throws Exception + * - can throw any exception, test framework will handle. + */ + @Throws(Exception::class) + protected abstract fun setup(testEnv: TestDestinationEnv, TEST_SCHEMAS: HashSet) + + /** + * Function that performs any clean up of external resources required for the test. e.g. delete + * a postgres database. This function will be called after EACH test. It MUST remove all data in + * the destination so that there is no contamination across tests. + * + * @param testEnv + * - information about the test environment. + * @throws Exception + * - can throw any exception, test framework will handle. + */ + @Throws(Exception::class) protected abstract fun tearDown(testEnv: TestDestinationEnv) + + @Deprecated( + """This method is moved to the AdvancedTestDataComparator. Please move your destination + implementation of the method to your comparator implementation.""" + ) + protected fun resolveIdentifier(identifier: String?): List { + return java.util.List.of(identifier) + } + + @BeforeEach + @Throws(Exception::class) + fun setUpInternal() { + val testDir = Path.of("/tmp/airbyte_tests/") + Files.createDirectories(testDir) + val workspaceRoot = Files.createTempDirectory(testDir, "test") + jobRoot = Files.createDirectories(Path.of(workspaceRoot.toString(), "job")) + localRoot = Files.createTempDirectory(testDir, "output") + LOGGER.info("jobRoot: {}", jobRoot) + LOGGER.info("localRoot: {}", localRoot) + testEnv = TestDestinationEnv(localRoot) + mConnectorConfigUpdater = Mockito.mock(ConnectorConfigUpdater::class.java) + TEST_SCHEMAS = HashSet() + setup(testEnv, TEST_SCHEMAS) + + processFactory = + DockerProcessFactory( + workspaceRoot, + workspaceRoot.toString(), + localRoot.toString(), + "host", + emptyMap() + ) + } + + @AfterEach + @Throws(Exception::class) + fun tearDownInternal() { + tearDown(testEnv) + } + + /** Verify that when the integrations returns a valid spec. */ + @Test + @Throws(TestHarnessException::class) + fun testGetSpec() { + Assertions.assertNotNull(runSpec()) + } + + /** + * Verify that when given valid credentials, that check connection returns a success response. + * Assume that the [DestinationAcceptanceTest.getConfig] is valid. + */ + @Test + @Throws(Exception::class) + fun testCheckConnection() { + Assertions.assertEquals( + StandardCheckConnectionOutput.Status.SUCCEEDED, + runCheck(getConfig()).status + ) + } + + /** + * Verify that when given invalid credentials, that check connection returns a failed response. + * Assume that the [DestinationAcceptanceTest.getFailCheckConfig] is invalid. + */ + @Test + @Throws(Exception::class) + fun testCheckConnectionInvalidCredentials() { + Assertions.assertEquals( + StandardCheckConnectionOutput.Status.FAILED, + runCheck(getFailCheckConfig()).status + ) + } + + /** + * Verify that the integration successfully writes records. Tests a wide variety of messages and + * schemas (aspirationally, anyway). + */ + @ParameterizedTest + @ArgumentsSource(DataArgumentsProvider::class) + @Throws(Exception::class) + fun testSync(messagesFilename: String?, catalogFilename: String?) { + val catalog = + Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages: List = + MoreResources.readResource(messagesFilename) + .lines() + .map { + Jsons.deserialize(it, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) + } + .toList() + + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) + retrieveRawRecordsAndAssertSameMessages(catalog, messages, defaultSchema) + } + + /** + * This serves to test MSSQL 2100 limit parameters in a single query. this means that for + * Airbyte insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100 + * params + */ + @ParameterizedTest + @ArgumentsSource(DataArgumentsProvider::class) + @Throws(Exception::class) + fun testSyncWithLargeRecordBatch(messagesFilename: String?, catalogFilename: String?) { + val catalog = + Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages: List = + MoreResources.readResource(messagesFilename) + .lines() + .map { + Jsons.deserialize(it, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) + } + .toList() + + val largeNumberRecords = + Collections.nCopies(400, messages) + .stream() + .flatMap { obj: List -> + obj.stream() + } // regroup messages per stream + .sorted( + Comparator.comparing { obj: io.airbyte.protocol.models.v0.AirbyteMessage -> + obj.type + } + .thenComparing { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + if ( + message.type == + io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD + ) + message.record.stream + else message.toString() + } + ) + .collect(Collectors.toList()) + + val config = getConfig() + runSyncAndVerifyStateOutput(config, largeNumberRecords, configuredCatalog, false) + } + + /** Verify that the integration overwrites the first sync with the second sync. */ + @Test + @Throws(Exception::class) + fun testSecondSync() { + if (!implementsOverwrite()) { + LOGGER.info("Destination's spec.json does not support overwrite sync mode.") + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + + val firstSyncMessages: List = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { + Jsons.deserialize( + it, + io.airbyte.protocol.models.v0.AirbyteMessage::class.java + ) + } + .toList() + val config = getConfig() + runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false) + + // We need to make sure that other streams\tables\files in the same location will not be + // affected\deleted\overridden by our activities during first, second or any future sync. + // So let's create a dummy data that will be checked after all sync. It should remain the + // same + val dummyCatalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + dummyCatalog.streams[0].name = DUMMY_CATALOG_NAME + val configuredDummyCatalog = CatalogHelpers.toDefaultConfiguredCatalog(dummyCatalog) + // update messages to set new dummy stream name + firstSyncMessages + .stream() + .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + message.record != null + } + .forEach { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + message.record.stream = DUMMY_CATALOG_NAME + } + // sync dummy data + runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredDummyCatalog, false) + + // Run second sync + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("id", 1) + .put("currency", "USD") + .put( + "date", + "2020-03-31T00:00:00Z" + ) // TODO(sherifnada) hack: write decimals with sigfigs + // because Snowflake stores 10.1 as "10" which + // fails destination tests + .put("HKD", 10.1) + .put("NZD", 700.1) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) + + runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) + val defaultSchema = getDefaultSchema(config) + retrieveRawRecordsAndAssertSameMessages(catalog, secondSyncMessages, defaultSchema) + + // verify that other streams in the same location were not affected. If something fails + // here, + // then this need to be fixed in connectors logic to override only required streams + retrieveRawRecordsAndAssertSameMessages(dummyCatalog, firstSyncMessages, defaultSchema) + } + + /** + * Tests that we are able to read over special characters properly when processing line breaks + * in destinations. + */ + @Test + @Throws(Exception::class) + fun testLineBreakCharacters() { + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val config = getConfig() + + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("id", 1) + .put("currency", "USD\u2028") + .put( + "date", + "2020-03-\n31T00:00:00Z\r" + ) // TODO(sherifnada) hack: write decimals with sigfigs + // because Snowflake stores 10.1 as "10" which + // fails destination tests + .put("HKD", 10.1) + .put("NZD", 700.1) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) + + runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) + val defaultSchema = getDefaultSchema(config) + retrieveRawRecordsAndAssertSameMessages(catalog, secondSyncMessages, defaultSchema) + } + + @Test + fun normalizationFromDefinitionValueShouldBeCorrect() { + if (normalizationFromDefinition()) { + var normalizationRunnerFactorySupportsDestinationImage: Boolean + try { + DefaultNormalizationRunner( + processFactory, + normalizationImageName, + normalizationIntegrationType + ) + normalizationRunnerFactorySupportsDestinationImage = true + } catch (e: IllegalStateException) { + normalizationRunnerFactorySupportsDestinationImage = false + } + Assertions.assertEquals( + normalizationFromDefinition(), + normalizationRunnerFactorySupportsDestinationImage + ) + } + } + + /** + * Verify that the integration successfully writes records incrementally. The second run should + * append records to the datastore instead of overwriting the previous run. + */ + @Test + @Throws(Exception::class) + fun testIncrementalSync() { + if (!implementsAppend()) { + LOGGER.info("Destination's spec.json does not include '\"supportsIncremental\" ; true'") + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + configuredCatalog.streams.forEach { s -> + s.withSyncMode(SyncMode.INCREMENTAL) + s.withDestinationSyncMode(DestinationSyncMode.APPEND) + } + + val firstSyncMessages: List = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + .toList() + val config = getConfig() + runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false) + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("id", 1) + .put("currency", "USD") + .put( + "date", + "2020-03-31T00:00:00Z" + ) // TODO(sherifnada) hack: write decimals with sigfigs + // because Snowflake stores 10.1 as "10" which + // fails destination tests + .put("HKD", 10.1) + .put("NZD", 700.1) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) + runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) + + val expectedMessagesAfterSecondSync: + MutableList = + ArrayList() + expectedMessagesAfterSecondSync.addAll(firstSyncMessages) + expectedMessagesAfterSecondSync.addAll(secondSyncMessages) + + val defaultSchema = getDefaultSchema(config) + retrieveRawRecordsAndAssertSameMessages( + catalog, + expectedMessagesAfterSecondSync, + defaultSchema + ) + } + + @ArgumentsSource(DataArgumentsProvider::class) + @Test + @Throws(Exception::class) + fun testIncrementalSyncWithNormalizationDropOneColumn() { + if (!normalizationFromDefinition() || !supportIncrementalSchemaChanges()) { + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + ProtocolVersion.V0 + ) + ), + AirbyteCatalog::class.java + ) + + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + configuredCatalog.streams.forEach { s -> + s.withSyncMode(SyncMode.INCREMENTAL) + s.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP) + s.withCursorField(emptyList()) + // use composite primary key of various types (string, float) + s.withPrimaryKey( + java.util.List.of( + listOf("id"), + listOf("currency"), + listOf("date"), + listOf("NZD"), + listOf("USD") + ) + ) + } + + var messages: List = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + ProtocolVersion.V0 + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + + val config = getConfig() + runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) + + val defaultSchema = getDefaultSchema(config) + var actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) + assertSameMessages(messages, actualMessages, true) + + // remove one field + val jsonSchema = configuredCatalog.streams[0].stream.jsonSchema + (jsonSchema.findValue("properties") as ObjectNode).remove("HKD") + // insert more messages + // NOTE: we re-read the messages because `assertSameMessages` above pruned the emittedAt + // timestamps. + messages = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + ProtocolVersion.V0 + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + messages.addLast( + Jsons.deserialize( + "{\"type\": \"RECORD\", \"record\": {\"stream\": \"exchange_rate\", \"emitted_at\": 1602637989500, \"data\": { \"id\": 2, \"currency\": \"EUR\", \"date\": \"2020-09-02T00:00:00Z\", \"NZD\": 1.14, \"USD\": 10.16}}}\n", + io.airbyte.protocol.models.v0.AirbyteMessage::class.java + ) + ) + + runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) + + // assert the removed field is missing on the new messages + actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) + + // We expect all the of messages to be missing the removed column after normalization. + val expectedMessages = + messages + .stream() + .map { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + if (message.record != null) { + (message.record.data as ObjectNode).remove("HKD") + } + message + } + .collect(Collectors.toList()) + assertSameMessages(expectedMessages, actualMessages, true) + } + + /** + * Verify that the integration successfully writes records successfully both raw and normalized. + * Tests a wide variety of messages an schemas (aspirationally, anyway). + */ + @ParameterizedTest + @ArgumentsSource(DataArgumentsProvider::class) + @Throws(Exception::class) + fun testSyncWithNormalization(messagesFilename: String?, catalogFilename: String?) { + if (!normalizationFromDefinition()) { + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = + MoreResources.readResource(messagesFilename).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } + + val config = getConfig() + runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) + + val defaultSchema = getDefaultSchema(config) + val actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) + assertSameMessages(messages, actualMessages, true) + } + + /** + * Verify that the integration successfully writes records successfully both raw and normalized + * and run dedupe transformations. + * + * Although this test assumes append-dedup requires normalization, and almost all our + * Destinations do so, this is not necessarily true. This explains [.implementsAppendDedup]. + */ + @Test + @Throws(Exception::class) + fun testIncrementalDedupeSync() { + if (!implementsAppendDedup()) { + LOGGER.info( + "Destination's spec.json does not include 'append_dedupe' in its '\"supportedDestinationSyncModes\"'" + ) + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + configuredCatalog.streams.forEach { s -> + s.withSyncMode(SyncMode.INCREMENTAL) + s.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP) + s.withCursorField(emptyList()) + // use composite primary key of various types (string, float) + s.withPrimaryKey( + java.util.List.of(listOf("id"), listOf("currency"), listOf("date"), listOf("NZD")) + ) + } + + val firstSyncMessages = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val config = getConfig() + runSyncAndVerifyStateOutput( + config, + firstSyncMessages, + configuredCatalog, + supportsNormalization() + ) + + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("id", 2) + .put("currency", "EUR") + .put("date", "2020-09-01T00:00:00Z") + .put("HKD", 10.5) + .put("NZD", 1.14) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli() + 100L) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("id", 1) + .put("currency", "USD") + .put("date", "2020-09-01T00:00:00Z") + .put("HKD", 5.4) + .put("NZD", 1.14) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) + runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) + + val expectedMessagesAfterSecondSync: + MutableList = + ArrayList() + expectedMessagesAfterSecondSync.addAll(firstSyncMessages) + expectedMessagesAfterSecondSync.addAll(secondSyncMessages) + + val latestMessagesOnly = + expectedMessagesAfterSecondSync + .filter { it.type == Type.RECORD && it.record != null } + .groupBy { + it.record.data["id"].asText() + + it.record.data["currency"].asText() + + it.record.data["date"].asText() + + it.record.data["NZD"].asText() + } + .mapValues { it.value.maxBy { it.record.emittedAt } } + // Filter expectedMessagesAfterSecondSync and keep latest messages only (keep same message + // order) + val expectedMessages = + expectedMessagesAfterSecondSync + .stream() + .filter { it.type == Type.RECORD && it.record != null } + .filter { + val key = + it.record.data["id"].asText() + + it.record.data["currency"].asText() + + it.record.data["date"].asText() + + it.record.data["NZD"].asText() + (it.record.emittedAt == latestMessagesOnly[key]!!.record.emittedAt) + } + .collect(Collectors.toList()) + + val defaultSchema = getDefaultSchema(config) + retrieveRawRecordsAndAssertSameMessages( + catalog, + expectedMessagesAfterSecondSync, + defaultSchema + ) + if (normalizationFromDefinition()) { + val actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) + assertSameMessages(expectedMessages, actualMessages, true) + } + } + + protected val maxRecordValueLimit: Int + /** @return the max limit length allowed for values in the destination. */ + get() = 1000000000 + + @Test + @Throws(Exception::class) + fun testCustomDbtTransformations() { + if (!dbtFromDefinition()) { + return + } + + val config = getConfig() + + // This may throw IllegalStateException "Requesting normalization, but it is not included in + // the + // normalization mappings" + // We indeed require normalization implementation of the 'transform_config' function for + // this + // destination, + // because we make sure to install required dbt dependency in the normalization docker image + // in + // order to run + // this test successfully and that we are able to convert a destination 'config.json' into a + // dbt + // 'profiles.yml' + // (we don't actually rely on normalization running anything else here though) + val runner = + DbtTransformationRunner( + processFactory, + DefaultNormalizationRunner( + processFactory, + normalizationImageName, + normalizationIntegrationType + ) + ) + runner.start() + val transformationRoot = Files.createDirectories(jobRoot!!.resolve("transform")) + val dbtConfig = + OperatorDbt() // Forked from https://github.com/dbt-labs/jaffle_shop because they made a + // change that would have + // required a dbt version upgrade + // https://github.com/dbt-labs/jaffle_shop/commit/b1680f3278437c081c735b7ea71c2ff9707bc75f#diff-27386df54b2629c1191d8342d3725ed8678413cfa13b5556f59d69d33fae5425R20 + // We're actually two commits upstream of that, because the previous commit + // (https://github.com/dbt-labs/jaffle_shop/commit/ec36ae177ab5cb79da39ff8ab068c878fbac13a0) also + // breaks something + // TODO once we're on DBT 1.x, switch this back to using the main branch + .withGitRepoUrl("https://github.com/airbytehq/jaffle_shop.git") + .withGitRepoBranch("pre_dbt_upgrade") + .withDockerImage(normalizationImageName) + // + // jaffle_shop is a fictional ecommerce store maintained by fishtownanalytics/dbt. + // + // This dbt project transforms raw data from an app database into a customers and orders + // model ready + // for analytics. + // The repo is a self-contained playground dbt project, useful for testing out scripts, and + // communicating some of the core dbt concepts: + // + // 1. First, it tests if connection to the destination works. + dbtConfig.withDbtArguments("debug") + if (!runner.run(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + throw TestHarnessException("dbt debug Failed.") + } + // 2. Install any dependencies packages, if any + dbtConfig.withDbtArguments("deps") + if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + throw TestHarnessException("dbt deps Failed.") + } + // 3. It contains seeds that includes some (fake) raw data from a fictional app as CSVs data + // sets. + // This materializes the CSVs as tables in your target schema. + // Note that a typical dbt project does not require this step since dbt assumes your raw + // data is + // already in your warehouse. + dbtConfig.withDbtArguments("seed") + if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + throw TestHarnessException("dbt seed Failed.") + } + // 4. Run the models: + // Note: If this steps fails, it might mean that you need to make small changes to the SQL + // in the + // models folder to adjust for the flavor of SQL of your target database. + dbtConfig.withDbtArguments("run") + if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + throw TestHarnessException("dbt run Failed.") + } + // 5. Test the output of the models and tables have been properly populated: + dbtConfig.withDbtArguments("test") + if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + throw TestHarnessException("dbt test Failed.") + } + // 6. Generate dbt documentation for the project: + // This step is commented out because it takes a long time, but is not vital for Airbyte + // dbtConfig.withDbtArguments("docs generate"); + // if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) + // { + // throw new WorkerException("dbt docs generate Failed."); + // } + runner.close() + } + + @Test + @Throws(Exception::class) + fun testCustomDbtTransformationsFailure() { + if (!normalizationFromDefinition() || !dbtFromDefinition()) { + // we require normalization implementation for this destination, because we make sure to + // install + // required dbt dependency in the normalization docker image in order to run this test + // successfully + // (we don't actually rely on normalization running anything here though) + return + } + + val config = getConfig() + + val runner = + DbtTransformationRunner( + processFactory, + DefaultNormalizationRunner( + processFactory, + normalizationImageName, + normalizationIntegrationType + ) + ) + runner.start() + val transformationRoot = Files.createDirectories(jobRoot!!.resolve("transform")) + val dbtConfig = + OperatorDbt() + .withGitRepoUrl("https://github.com/fishtown-analytics/dbt-learn-demo.git") + .withGitRepoBranch("main") + .withDockerImage("fishtownanalytics/dbt:0.19.1") + .withDbtArguments("debug") + if (!runner.run(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + throw TestHarnessException("dbt debug Failed.") + } + + dbtConfig.withDbtArguments("test") + Assertions.assertFalse( + runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig), + "dbt test should fail, as we haven't run dbt run on this project yet" + ) + } + + /** Verify the destination uses the namespace field if it is set. */ + @Test + @Throws(Exception::class) + fun testSyncUsesAirbyteStreamNamespaceIfNotNull() { + if (!implementsNamespaces()) { + return + } + + // TODO(davin): make these tests part of the catalog file. + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + // A unique namespace is required to avoid test isolation problems. + val namespace = TestingNamespaces.generate("source_namespace") + TEST_SCHEMAS!!.add(namespace) + + catalog.streams.forEach(Consumer { stream: AirbyteStream -> stream.namespace = namespace }) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + + val messages = + MoreResources.readResource( + DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val messagesWithNewNamespace = getRecordMessagesWithNewNamespace(messages, namespace) + + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false) + retrieveRawRecordsAndAssertSameMessages(catalog, messagesWithNewNamespace, defaultSchema) + } + + /** Verify a destination is able to write tables with the same name to different namespaces. */ + @Test + @Throws(Exception::class) + fun testSyncWriteSameTableNameDifferentNamespace() { + if (!implementsNamespaces()) { + return + } + + // TODO(davin): make these tests part of the catalog file. + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val namespace1 = TestingNamespaces.generate("source_namespace") + TEST_SCHEMAS!!.add(namespace1) + catalog.streams.forEach(Consumer { stream: AirbyteStream -> stream.namespace = namespace1 }) + + val diffNamespaceStreams = ArrayList() + val namespace2 = TestingNamespaces.generate("diff_source_namespace") + TEST_SCHEMAS!!.add(namespace2) + val mapper = MoreMappers.initMapper() + for (stream in catalog.streams) { + val clonedStream = + mapper.readValue(mapper.writeValueAsString(stream), AirbyteStream::class.java) + clonedStream.namespace = namespace2 + diffNamespaceStreams.add(clonedStream) + } + catalog.streams.addAll(diffNamespaceStreams) + + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messageFile: String = + DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion()) + val ns1Messages = + MoreResources.readResource(messageFile).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } + val ns1MessagesAtNamespace1 = getRecordMessagesWithNewNamespace(ns1Messages, namespace1) + val ns2Messages: List = + MoreResources.readResource(messageFile).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } + val ns2MessagesAtNamespace2 = getRecordMessagesWithNewNamespace(ns2Messages, namespace2) + + val allMessages = ArrayList(ns1MessagesAtNamespace1) + allMessages.addAll(ns2MessagesAtNamespace2) + + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + runSyncAndVerifyStateOutput(config, allMessages, configuredCatalog, false) + retrieveRawRecordsAndAssertSameMessages(catalog, allMessages, defaultSchema) + } + + /** + * The goal of this test is to verify the expected conversions of a namespace as it appears in + * the catalog to how it appears in the destination. Each database has its own rules, so this + * test runs through several "edge" case sorts of names and checks the behavior. + * + * @param testCaseId + * - the id of each test case in namespace_test_cases.json so that we can handle an individual + * case specially for a specific database. + * @param namespaceInCatalog + * - namespace as it would appear in the catalog + * @param namespaceInDst + * - namespace as we would expect it to appear in the destination (this may be overridden for + * different databases). + * @throws Exception + * - broad catch of exception to hydrate log information with additional test case context. + */ + @ParameterizedTest + @ArgumentsSource(NamespaceTestCaseProvider::class) + @Throws(Exception::class) + fun testNamespaces(testCaseId: String?, namespaceInCatalog: String, namespaceInDst: String?) { + val nameTransformer = getNameTransformer() + nameTransformer.ifPresent { namingConventionTransformer: NamingConventionTransformer -> + assertNamespaceNormalization( + testCaseId, + namespaceInDst, + namingConventionTransformer.getNamespace(namespaceInCatalog!!) + ) + } + + if (!implementsNamespaces() || !supportNamespaceTest()) { + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.NAMESPACE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + catalog.streams.forEach( + Consumer { stream: AirbyteStream -> stream.namespace = namespaceInCatalog } + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + + val messages = + MoreResources.readResource( + DataArgumentsProvider.NAMESPACE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val messagesWithNewNamespace = + getRecordMessagesWithNewNamespace(messages, namespaceInCatalog) + + val config = getConfig() + try { + runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false) + // Add to the list of schemas to clean up. + TEST_SCHEMAS!!.add(namespaceInCatalog) + } catch (e: Exception) { + throw IOException( + String.format( + "[Test Case %s] Destination failed to sync data to namespace %s, see \"namespace_test_cases.json for details\"", + testCaseId, + namespaceInCatalog + ), + e + ) + } + } + + /** + * In order to launch a source on Kubernetes in a pod, we need to be able to wrap the + * entrypoint. The source connector must specify its entrypoint in the AIRBYTE_ENTRYPOINT + * variable. This test ensures that the entrypoint environment variable is set. + */ + @Test + @Throws(Exception::class) + fun testEntrypointEnvVar() { + val entrypoint = + EntrypointEnvChecker.getEntrypointEnvVariable( + processFactory, + JOB_ID, + JOB_ATTEMPT, + jobRoot, + imageName + ) + + Assertions.assertNotNull(entrypoint) + Assertions.assertFalse(entrypoint.isBlank()) + } + + /** + * Verify that destination doesn't fail if new fields arrive in the data after initial schema + * discovery and sync. + * + * @throws Exception + */ + @Test + @Throws(Exception::class) + fun testSyncNotFailsWithNewFields() { + if (!implementsOverwrite()) { + LOGGER.info("Destination's spec.json does not support overwrite sync mode.") + return + } + + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + + val firstSyncMessages = + MoreResources.readResource( + DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val config = getConfig() + runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false) + val stream = catalog.streams[0] + + // Run second sync with new fields on the message + val secondSyncMessagesWithNewFields: + MutableList = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(stream.name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("id", 1) + .put("currency", "USD") + .put("date", "2020-03-31T00:00:00Z") + .put("newFieldString", "Value for new field") + .put("newFieldNumber", 3) + .put("HKD", 10.1) + .put("NZD", 700.1) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) + + // Run sync and verify that all message were written without failing + runSyncAndVerifyStateOutput( + config, + secondSyncMessagesWithNewFields, + configuredCatalog, + false + ) + val destinationOutput = + retrieveRecords(testEnv, stream.name, getDefaultSchema(config), stream.jsonSchema) + // Remove state message + secondSyncMessagesWithNewFields.removeIf { + airbyteMessage: io.airbyte.protocol.models.v0.AirbyteMessage -> + airbyteMessage.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE + } + Assertions.assertEquals(secondSyncMessagesWithNewFields.size, destinationOutput.size) + } + + /** Whether the destination should be tested against different namespaces. */ + protected fun supportNamespaceTest(): Boolean { + return false + } + + /** + * Set up the name transformer used by a destination to test it against a variety of namespaces. + */ + protected open fun getNameTransformer(): Optional = + Optional.empty() + + /** + * Override this method if the normalized namespace is different from the default one. E.g. + * BigQuery does allow a name starting with a number. So it should change the expected + * normalized namespace when testCaseId = "S3A-1". Find the testCaseId in + * "namespace_test_cases.json". + */ + protected fun assertNamespaceNormalization( + testCaseId: String?, + expectedNormalizedNamespace: String?, + actualNormalizedNamespace: String? + ) { + Assertions.assertEquals( + expectedNormalizedNamespace, + actualNormalizedNamespace, + String.format( + "Test case %s failed; if this is expected, please override assertNamespaceNormalization", + testCaseId + ) + ) + } + + @Throws(TestHarnessException::class) + private fun runSpec(): ConnectorSpecification { + return convertProtocolObject( + DefaultGetSpecTestHarness( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ) + ) + .run(JobGetSpecConfig().withDockerImage(imageName), jobRoot) + .spec, + ConnectorSpecification::class.java + ) + } + + @Throws(TestHarnessException::class) + protected fun runCheck(config: JsonNode?): StandardCheckConnectionOutput { + return DefaultCheckConnectionTestHarness( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ), + mConnectorConfigUpdater + ) + .run(StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot) + .checkConnection + } + + protected fun runCheckWithCatchedException( + config: JsonNode? + ): StandardCheckConnectionOutput.Status { + try { + val standardCheckConnectionOutput = + DefaultCheckConnectionTestHarness( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ), + mConnectorConfigUpdater + ) + .run( + StandardCheckConnectionInput().withConnectionConfiguration(config), + jobRoot + ) + .checkConnection + return standardCheckConnectionOutput.status + } catch (e: Exception) { + LOGGER.error("Failed to check connection:" + e.message) + } + return StandardCheckConnectionOutput.Status.FAILED + } + + protected val destination: AirbyteDestination + get() = + DefaultAirbyteDestination( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ) + ) + + @Throws(Exception::class) + protected fun runSyncAndVerifyStateOutput( + config: JsonNode, + messages: List, + catalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + runNormalization: Boolean + ) { + val destinationOutput = runSync(config, messages, catalog, runNormalization) + + val expectedStateMessage = + reversed(messages) + .stream() + .filter { m: io.airbyte.protocol.models.v0.AirbyteMessage -> + m.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE + } + .findFirst() + .orElseThrow { + IllegalArgumentException( + "All message sets used for testing should include a state record" + ) + }!! + + Collections.reverse(destinationOutput) + val actualStateMessage = + destinationOutput + .stream() + .filter { m: io.airbyte.protocol.models.v0.AirbyteMessage? -> + m!!.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE + } + .findFirst() + .map { msg: io.airbyte.protocol.models.v0.AirbyteMessage? -> + // Modify state message to remove destination stats. + val clone = msg!!.state + clone.destinationStats = null + msg.state = clone + msg + } + .orElseGet { + Assertions.fail("Destination failed to output state") + null + } + + Assertions.assertEquals(expectedStateMessage, actualStateMessage) + } + + @Throws(Exception::class) + private fun runSync( + config: JsonNode, + messages: List, + catalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + runNormalization: Boolean + ): List { + val destinationConfig = + WorkerDestinationConfig() + .withConnectionId(UUID.randomUUID()) + .withCatalog( + convertProtocolObject( + catalog, + io.airbyte.protocol.models.ConfiguredAirbyteCatalog::class.java + ) + ) + .withDestinationConnectionConfiguration(config) + + val destination = destination + + destination.start( + destinationConfig, + jobRoot, + inDestinationNormalizationFlags(runNormalization) + ) + messages.forEach( + Consumer { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + Exceptions.toRuntime { + destination.accept( + convertProtocolObject( + message, + io.airbyte.protocol.models.AirbyteMessage::class.java + ) + ) + } + } + ) + destination.notifyEndOfInput() + + val destinationOutput: MutableList = + ArrayList() + while (!destination.isFinished) { + destination.attemptRead().ifPresent { m: io.airbyte.protocol.models.AirbyteMessage -> + destinationOutput.add(convertProtocolObject(m, AirbyteMessage::class.java)) + } + } + + destination.close() + + if (!runNormalization || (supportsInDestinationNormalization())) { + return destinationOutput + } + + val runner: NormalizationRunner = + DefaultNormalizationRunner( + processFactory, + normalizationImageName, + normalizationIntegrationType + ) + runner.start() + val normalizationRoot = Files.createDirectories(jobRoot!!.resolve("normalize")) + if ( + !runner.normalize( + JOB_ID, + JOB_ATTEMPT, + normalizationRoot, + destinationConfig.destinationConnectionConfiguration, + destinationConfig.catalog, + null + ) + ) { + throw TestHarnessException("Normalization Failed.") + } + runner.close() + return destinationOutput + } + + @Throws(Exception::class) + protected fun retrieveRawRecordsAndAssertSameMessages( + catalog: AirbyteCatalog, + messages: List, + defaultSchema: String? + ) { + val actualMessages: MutableList = ArrayList() + for (stream in catalog.streams) { + val streamName = stream.name + val schema = if (stream.namespace != null) stream.namespace else defaultSchema!! + val msgList = + retrieveRecords(testEnv, streamName, schema, stream.jsonSchema) + .stream() + .map { data: JsonNode? -> + AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(schema) + .withData(data) + } + .toList() + actualMessages.addAll(msgList) + } + + assertSameMessages(messages, actualMessages, false) + } + + // ignores emitted at. + protected fun assertSameMessages( + expected: List, + actual: List, + pruneAirbyteInternalFields: Boolean + ) { + val expectedProcessed = + expected + .stream() + .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + message.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD + } + .map { obj: io.airbyte.protocol.models.v0.AirbyteMessage -> obj.record } + .peek { recordMessage: AirbyteRecordMessage -> recordMessage.emittedAt = null } + .map { recordMessage: AirbyteRecordMessage -> + if (pruneAirbyteInternalFields) safePrune(recordMessage) else recordMessage + } + .map { obj: AirbyteRecordMessage -> obj.data } + .collect(Collectors.toList()) + + val actualProcessed = + actual + .stream() + .map { recordMessage: AirbyteRecordMessage -> + if (pruneAirbyteInternalFields) safePrune(recordMessage) else recordMessage + } + .map { obj: AirbyteRecordMessage -> obj.data } + .collect(Collectors.toList()) + + _testDataComparator.assertSameData(expectedProcessed, actualProcessed) + } + + @Throws(Exception::class) + protected fun retrieveNormalizedRecords( + catalog: AirbyteCatalog, + defaultSchema: String? + ): List { + val actualMessages: MutableList = ArrayList() + + for (stream in catalog.streams) { + val streamName = stream.name + + val msgList = + retrieveNormalizedRecords(testEnv, streamName, defaultSchema) + .stream() + .map { data: JsonNode? -> + AirbyteRecordMessage().withStream(streamName).withData(data) + } + .toList() + actualMessages.addAll(msgList) + } + return actualMessages + } + + class TestDestinationEnv(val localRoot: Path?) { + override fun toString(): String { + return "TestDestinationEnv{" + "localRoot=" + localRoot + '}' + } + } + + /** + * This test MUST be disabled by default, but you may uncomment it and use when need to + * reproduce a performance issue for destination. This test helps you to emulate lot's of stream + * and messages in each simply changing the "streamsSize" args to set a number of tables\streams + * and the "messagesNumber" to a messages number that would be written in each stream. !!! Do + * NOT forget to manually remove all generated objects !!! Hint: To check the destination + * container output run "docker ps" command in console to find the container's id. Then run + * "docker container attach your_containers_id" (ex. docker container attach 18cc929f44c8) to + * see the container's output + */ + @Test + @Disabled + @Throws(Exception::class) + fun testStressPerformance() { + val streamsSize = 5 // number of generated streams + val messagesNumber = 300 // number of msg to be written to each generated stream + + // Each stream will have an id and name fields + val USERS_STREAM_NAME = "users" // stream's name prefix. Will get "user0", "user1", etc. + val ID = "id" + val NAME = "name" + + // generate schema\catalogs + val configuredAirbyteStreams: MutableList = ArrayList() + for (i in 0 until streamsSize) { + configuredAirbyteStreams.add( + CatalogHelpers.createAirbyteStream( + USERS_STREAM_NAME + i, + Field.of(NAME, JsonSchemaType.STRING), + Field.of(ID, JsonSchemaType.STRING) + ) + ) + } + val testCatalog = AirbyteCatalog().withStreams(configuredAirbyteStreams) + val configuredTestCatalog = CatalogHelpers.toDefaultConfiguredCatalog(testCatalog) + + val config = getConfig() + val destinationConfig = + WorkerDestinationConfig() + .withConnectionId(UUID.randomUUID()) + .withCatalog( + convertProtocolObject( + configuredTestCatalog, + io.airbyte.protocol.models.ConfiguredAirbyteCatalog::class.java + ) + ) + .withDestinationConnectionConfiguration(config) + val destination = destination + + // Start destination + destination.start(destinationConfig, jobRoot, emptyMap()) + + val currentStreamNumber = AtomicInteger(0) + val currentRecordNumberForStream = AtomicInteger(0) + + // this is just a current state logger. Useful when running long hours tests to see the + // progress + val countPrinter = Thread { + while (true) { + println( + "currentStreamNumber=" + + currentStreamNumber + + ", currentRecordNumberForStream=" + + currentRecordNumberForStream + + ", " + + Instant.now() + ) + try { + Thread.sleep(10000) + } catch (e: InterruptedException) { + e.printStackTrace() + } + } + } + countPrinter.start() + + // iterate through streams + for (streamCounter in 0 until streamsSize) { + LOGGER.info("Started new stream processing with #$streamCounter") + // iterate through msm inside a particular stream + // Generate messages and put it to stream + for (msgCounter in 0 until messagesNumber) { + val msg = + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(USERS_STREAM_NAME + streamCounter) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put(NAME, LOREM_IPSUM) + .put(ID, streamCounter.toString() + "_" + msgCounter) + .build() + ) + ) + .withEmittedAt(Instant.now().toEpochMilli()) + ) + try { + destination.accept( + convertProtocolObject( + msg, + io.airbyte.protocol.models.AirbyteMessage::class.java + ) + ) + } catch (e: Exception) { + LOGGER.error("Failed to write a RECORD message: $e") + throw RuntimeException(e) + } + + currentRecordNumberForStream.set(msgCounter) + } + + // send state message here, it's required + val msgState = + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData( + Jsons.jsonNode( + ImmutableMap.builder() + .put("start_date", "2020-09-02") + .build() + ) + ) + ) + try { + destination.accept( + convertProtocolObject( + msgState, + io.airbyte.protocol.models.AirbyteMessage::class.java + ) + ) + } catch (e: Exception) { + LOGGER.error("Failed to write a STATE message: $e") + throw RuntimeException(e) + } + + currentStreamNumber.set(streamCounter) + } + + LOGGER.info( + String.format( + "Added %s messages to each of %s streams", + currentRecordNumberForStream, + currentStreamNumber + ) + ) + // Close destination + destination.notifyEndOfInput() + } + + protected open fun supportBasicDataTypeTest(): Boolean { + return false + } + + protected open fun supportArrayDataTypeTest(): Boolean { + return false + } + + protected open fun supportObjectDataTypeTest(): Boolean { + return false + } + + protected fun supportIncrementalSchemaChanges(): Boolean { + return false + } + + /** + * The method should be overridden if destination connector support newer protocol version + * otherwise [io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion.V0] is used + * + * NOTE: Method should be public in a sake of java reflection + * + * @return + */ + open fun getProtocolVersion(): ProtocolVersion = ProtocolVersion.V0 + + private fun checkTestCompatibility( + testCompatibility: DataTypeTestArgumentProvider.TestCompatibility + ): Boolean { + return testCompatibility.isTestCompatible( + supportBasicDataTypeTest(), + supportArrayDataTypeTest(), + supportObjectDataTypeTest() + ) + } + + @ParameterizedTest + @ArgumentsSource(DataTypeTestArgumentProvider::class) + @Throws(Exception::class) + fun testDataTypeTestWithNormalization( + messagesFilename: String?, + catalogFilename: String?, + testCompatibility: DataTypeTestArgumentProvider.TestCompatibility + ) { + if (!checkTestCompatibility(testCompatibility)) { + return + } + + val catalog = readCatalogFromFile(catalogFilename) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = readMessagesFromFile(messagesFilename) + + runAndCheck(catalog, configuredCatalog, messages) + } + + @Test + @Throws(Exception::class) + fun testSyncNumberNanDataType() { + // NaN/Infinity protocol supports started from V1 version or higher + val numericTypesSupport = specialNumericTypesSupportTest + if (getProtocolVersion() == ProtocolVersion.V0 || !numericTypesSupport.supportNumberNan) { + return + } + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NUMBER_TYPE_CATALOG, + getProtocolVersion() + ) + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NAN_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + + runAndCheck(catalog, configuredCatalog, messages) + } + + @Test + @Throws(Exception::class) + fun testSyncIntegerNanDataType() { + // NaN/Infinity protocol supports started from V1 version or higher + val numericTypesSupport = specialNumericTypesSupportTest + if (getProtocolVersion() == ProtocolVersion.V0 || !numericTypesSupport.supportIntegerNan) { + return + } + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INTEGER_TYPE_CATALOG, + getProtocolVersion() + ) + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NAN_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + + runAndCheck(catalog, configuredCatalog, messages) + } + + @Test + @Throws(Exception::class) + fun testSyncNumberInfinityDataType() { + // NaN/Infinity protocol supports started from V1 version or higher + val numericTypesSupport = specialNumericTypesSupportTest + if ( + getProtocolVersion() == ProtocolVersion.V0 || !numericTypesSupport.supportNumberInfinity + ) { + return + } + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NUMBER_TYPE_CATALOG, + getProtocolVersion() + ) + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INFINITY_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + + runAndCheck(catalog, configuredCatalog, messages) + } + + @Test + @Throws(Exception::class) + fun testSyncIntegerInfinityDataType() { + // NaN/Infinity protocol supports started from V1 version or higher + val numericTypesSupport = specialNumericTypesSupportTest + if ( + getProtocolVersion() == ProtocolVersion.V0 || + !numericTypesSupport.supportIntegerInfinity + ) { + return + } + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INTEGER_TYPE_CATALOG, + getProtocolVersion() + ) + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INFINITY_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() + val defaultSchema = getDefaultSchema(config) + + runAndCheck(catalog, configuredCatalog, messages) + } + + @Throws(Exception::class) + private fun runAndCheck( + catalog: AirbyteCatalog, + configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + messages: List + ) { + if (normalizationFromDefinition()) { + LOGGER.info("Normalization is supported! Run test with normalization.") + runAndCheckWithNormalization(messages, configuredCatalog, catalog) + } else { + LOGGER.info("Normalization is not supported! Run test without normalization.") + runAndCheckWithoutNormalization(messages, configuredCatalog, catalog) + } + } + + @Throws(Exception::class) + private fun runAndCheckWithNormalization( + messages: List, + configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + catalog: AirbyteCatalog + ) { + val config = getConfig() + runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) + + val actualMessages = retrieveNormalizedRecords(catalog, getDefaultSchema(config)) + assertSameMessages(messages, actualMessages, true) + } + + @Throws(Exception::class) + private fun runAndCheckWithoutNormalization( + messages: List, + configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + catalog: AirbyteCatalog + ) { + val config = getConfig() + runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) + retrieveRawRecordsAndAssertSameMessages(catalog, messages, getDefaultSchema(config)) + } + + /** + * Can be used in overridden [ + * getSpecialNumericTypesSupportTest()][.getSpecialNumericTypesSupportTest] method to specify if + * connector supports Integer/Number NaN or Integer/Number Infinity types + */ + class SpecialNumericTypes( + val supportIntegerNan: Boolean = false, + val supportNumberNan: Boolean = false, + val supportIntegerInfinity: Boolean = false, + val supportNumberInfinity: Boolean = false + ) + + class NamespaceTestCaseProvider : ArgumentsProvider { + @Throws(Exception::class) + override fun provideArguments(context: ExtensionContext): Stream { + val testCases = Jsons.deserialize(MoreResources.readResource(NAMESPACE_TEST_CASES_JSON)) + return MoreIterators.toList(testCases.elements()) + .stream() + .filter { testCase: JsonNode -> testCase["enabled"].asBoolean() } + .map { testCase: JsonNode -> + val namespaceInCatalog = + TestingNamespaces.generate(testCase["namespace"].asText()) + val namespaceInDst = + TestingNamespaces.generateFromOriginal( + namespaceInCatalog, + testCase["namespace"].asText(), + testCase["normalized"].asText() + ) + Arguments.of( + testCase["id"] + .asText(), // Add uniqueness to namespace to avoid collisions between + // tests. + namespaceInCatalog, + namespaceInDst + ) + } + } + + companion object { + const val NAMESPACE_TEST_CASES_JSON: String = "namespace_test_cases.json" + } + } + + private fun supportsNormalization(): Boolean { + return supportsInDestinationNormalization() || normalizationFromDefinition() + } + + companion object { + private val RANDOM = Random() + private const val NORMALIZATION_VERSION = "dev" + + private const val JOB_ID = "0" + private const val JOB_ATTEMPT = 0 + + private const val DUMMY_CATALOG_NAME = "DummyCatalog" + + private val LOGGER: Logger = LoggerFactory.getLogger(DestinationAcceptanceTest::class.java) + + /** + * Reverses a list by creating a new list with the same elements of the input list and then + * reversing it. The input list will not be altered. + * + * @param list to reverse + * @param type + * @return new list with elements of original reversed. + */ + fun reversed(list: List): List { + val reversed = ArrayList(list) + Collections.reverse(reversed) + return reversed + } + + /** + * Same as [.pruneMutate], except does a defensive copy and returns a new json node object + * instead of mutating in place. + * + * @param record + * - record that will be pruned. + * @return pruned json node. + */ + private fun safePrune(record: AirbyteRecordMessage): AirbyteRecordMessage { + val clone = Jsons.clone(record) + pruneMutate(clone.data) + return clone + } + + /** + * Prune fields that are added internally by airbyte and are not part of the original data. + * Used so that we can compare data that is persisted by an Airbyte worker to the original + * data. This method mutates the provided json in place. + * + * @param json + * - json that will be pruned. will be mutated in place! + */ + private fun pruneMutate(json: JsonNode) { + for (key in Jsons.keys(json)) { + val node = json[key] + // recursively prune all airbyte internal fields. + if (node.isObject || node.isArray) { + pruneMutate(node) + } + + // prune the following + // - airbyte internal fields + // - fields that match what airbyte generates as hash ids + // - null values -- normalization will often return `: null` but in the + // original data that key + // likely did not exist in the original message. the most consistent thing to do is + // always remove + // the null fields (this choice does decrease our ability to check that + // normalization creates + // columns even if all the values in that column are null) + val airbyteInternalFields = + Sets.newHashSet( + "emitted_at", + "ab_id", + "normalized_at", + "EMITTED_AT", + "AB_ID", + "NORMALIZED_AT", + "HASHID", + "unique_key", + "UNIQUE_KEY" + ) + if ( + airbyteInternalFields.stream().anyMatch { internalField: String -> + key.lowercase(Locale.getDefault()) + .contains(internalField.lowercase(Locale.getDefault())) + } || json[key].isNull + ) { + (json as ObjectNode).remove(key) + } + } + } + + private const val LOREM_IPSUM = + ("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque malesuada lacinia aliquet. Nam feugiat mauris vel magna dignissim feugiat. Nam non dapibus sapien, ac mattis purus. Donec mollis libero erat, a rutrum ipsum pretium id. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Integer nec aliquam leo. Aliquam eu dictum augue, a ornare elit.\n" + + "\n" + + "Nulla viverra blandit neque. Nam blandit varius efficitur. Nunc at sapien blandit, malesuada lectus vel, tincidunt orci. Proin blandit metus eget libero facilisis interdum. Aenean luctus scelerisque orci, at scelerisque sem vestibulum in. Nullam ornare massa sed dui efficitur, eget volutpat lectus elementum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Integer elementum mi vitae erat eleifend iaculis. Nullam eget tincidunt est, eget tempor est. Sed risus velit, iaculis vitae est in, volutpat consectetur odio. Aenean ut fringilla elit. Suspendisse non aliquet massa. Curabitur suscipit metus nunc, nec porttitor velit venenatis vel. Fusce vestibulum eleifend diam, lobortis auctor magna.\n" + + "\n" + + "Etiam maximus, mi feugiat pharetra mattis, nulla neque euismod metus, in congue nunc sem nec ligula. Curabitur aliquam, risus id convallis cursus, nunc orci sollicitudin enim, quis scelerisque nibh dui in ipsum. Suspendisse mollis, metus a dapibus scelerisque, sapien nulla pretium ipsum, non finibus sem orci et lectus. Aliquam dictum magna nisi, a consectetur urna euismod nec. In pulvinar facilisis nulla, id mollis libero pulvinar vel. Nam a commodo leo, eu commodo dolor. In hac habitasse platea dictumst. Curabitur auctor purus quis tortor laoreet efficitur. Quisque tincidunt, risus vel rutrum fermentum, libero urna dignissim augue, eget pulvinar nibh ligula ut tortor. Vivamus convallis non risus sed consectetur. Etiam accumsan enim ac nisl suscipit, vel congue lorem volutpat. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce non orci quis lacus rhoncus vestibulum nec ut magna. In varius lectus nec quam posuere finibus. Vivamus quis lectus vitae tortor sollicitudin fermentum.\n" + + "\n" + + "Pellentesque elementum vehicula egestas. Sed volutpat velit arcu, at imperdiet sapien consectetur facilisis. Suspendisse porttitor tincidunt interdum. Morbi gravida faucibus tortor, ut rutrum magna tincidunt a. Morbi eu nisi eget dui finibus hendrerit sit amet in augue. Aenean imperdiet lacus enim, a volutpat nulla placerat at. Suspendisse nibh ipsum, venenatis vel maximus ut, fringilla nec felis. Sed risus mi, egestas quis quam ullamcorper, pharetra vestibulum diam.\n" + + "\n" + + "Praesent finibus scelerisque elit, accumsan condimentum risus mattis vitae. Donec tristique hendrerit facilisis. Curabitur metus purus, venenatis non elementum id, finibus eu augue. Quisque posuere rhoncus ligula, et vehicula erat pulvinar at. Pellentesque vel quam vel lectus tincidunt congue quis id sapien. Ut efficitur mauris vitae pretium iaculis. Aliquam consectetur iaculis nisi vitae laoreet. Integer vel odio quis diam mattis tempor eget nec est. Donec iaculis facilisis neque, at dictum magna vestibulum ut. Sed malesuada non nunc ac consequat. Maecenas tempus lectus a nisl congue, ac venenatis diam viverra. Nam ac justo id nulla iaculis lobortis in eu ligula. Vivamus et ligula id sapien efficitur aliquet. Curabitur est justo, tempus vitae mollis quis, tincidunt vitae felis. Vestibulum molestie laoreet justo, nec mollis purus vulputate at.") + + protected val specialNumericTypesSupportTest: SpecialNumericTypes + /** + * NaN and Infinity test are not supported by default. Please override this method to + * specify NaN/Infinity types support example: + * + *

+             *
+             * protected SpecialNumericTypes getSpecialNumericTypesSupportTest() { return
+             * SpecialNumericTypes.builder() .supportNumberNan(true) .supportIntegerNan(true)
+             * .build(); } 
* + * + * @return SpecialNumericTypes with support flags + */ + get() = SpecialNumericTypes() + + @Throws(IOException::class) + private fun readCatalogFromFile(catalogFilename: String?): AirbyteCatalog { + return Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + } + + @Throws(IOException::class) + private fun readMessagesFromFile( + messagesFilename: String? + ): List { + return MoreResources.readResource(messagesFilename).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } + } + + /** Mutate the input airbyte record message namespace. */ + private fun getRecordMessagesWithNewNamespace( + airbyteMessages: List, + namespace: String? + ): List { + airbyteMessages.forEach( + Consumer { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + if (message.record != null) { + message.record.namespace = namespace + } + } + ) + return airbyteMessages + } + + private fun convertProtocolObject(v1: V1, klass: Class): V0 { + return Jsons.`object`(Jsons.jsonNode(v1), klass) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt new file mode 100644 index 000000000000..bcbe334e834e --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.commons.json.Jsons + +object DestinationAcceptanceTestUtils { + fun putStringIntoJson(stringValue: String?, fieldName: String?, node: ObjectNode) { + if ( + stringValue != null && + (stringValue.startsWith("[") && stringValue.endsWith("]") || + stringValue.startsWith("{") && stringValue.endsWith("}")) + ) { + node.set(fieldName, Jsons.deserialize(stringValue)) + } else { + node.put(fieldName, stringValue) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt new file mode 100644 index 000000000000..a77b4ae71399 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import java.util.* +import java.util.function.Function +import org.jooq.Field +import org.jooq.Record + +abstract class JdbcDestinationAcceptanceTest : DestinationAcceptanceTest() { + protected val mapper: ObjectMapper = ObjectMapper() + + protected fun getJsonFromRecord(record: Record): JsonNode { + return getJsonFromRecord(record, Function { x: Any? -> Optional.empty() }) + } + + protected fun getJsonFromRecord( + record: Record, + valueParser: Function> + ): JsonNode { + val node = mapper.createObjectNode() + + Arrays.stream(record.fields()).forEach { field: Field<*> -> + val value = record[field] + val parsedValue = valueParser.apply(value) + if (parsedValue.isPresent) { + node.put(field.name, parsedValue.get()) + } else { + when (field.dataType.typeName) { + "varchar", + "nvarchar", + "jsonb", + "json", + "other" -> { + val stringValue = (value?.toString()) + DestinationAcceptanceTestUtils.putStringIntoJson( + stringValue, + field.name, + node + ) + } + else -> node.put(field.name, (value?.toString())) + } + } + } + return node + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt new file mode 100644 index 000000000000..9ad2d55c33cb --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer +import io.airbyte.cdk.integrations.base.Destination +import io.airbyte.commons.json.Jsons +import io.airbyte.configoss.WorkerDestinationConfig +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.workers.internal.AirbyteDestination +import java.nio.file.Path +import java.util.* + +/** + * Simple class to host a Destination in-memory rather than spinning up a container for it. For + * debugging and testing purposes only; not recommended to use this for real code + */ +class LocalAirbyteDestination(private val dest: Destination) : AirbyteDestination { + private var consumer: AirbyteMessageConsumer? = null + private var isClosed = false + + @Throws(Exception::class) + override fun start( + destinationConfig: WorkerDestinationConfig, + jobRoot: Path, + additionalEnvironmentVariables: Map + ) { + consumer = + dest.getConsumer( + destinationConfig.destinationConnectionConfiguration, + Jsons.`object`( + Jsons.jsonNode(destinationConfig.catalog), + ConfiguredAirbyteCatalog::class.java + ) + ) { Destination::defaultOutputRecordCollector } + consumer!!.start() + } + + @Throws(Exception::class) + override fun accept(message: io.airbyte.protocol.models.AirbyteMessage) { + consumer!!.accept(Jsons.`object`(Jsons.jsonNode(message), AirbyteMessage::class.java)) + } + + override fun notifyEndOfInput() { + // nothing to do here + } + + @Throws(Exception::class) + override fun close() { + consumer!!.close() + isClosed = true + } + + override fun cancel() { + // nothing to do here + } + + override fun isFinished(): Boolean { + return isClosed + } + + override fun getExitValue(): Int { + return 0 + } + + override fun attemptRead(): Optional { + return Optional.empty() + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt new file mode 100644 index 000000000000..8eec831460ba --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer +import io.airbyte.commons.json.Jsons +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteStateMessage +import io.airbyte.protocol.models.v0.AirbyteStreamState +import io.airbyte.protocol.models.v0.StreamDescriptor +import java.util.function.Consumer +import org.junit.jupiter.api.Test +import org.mockito.Mockito + +abstract class PerStreamStateMessageTest { + protected abstract val mockedConsumer: Consumer + get + + protected abstract val messageConsumer: FailureTrackingAirbyteMessageConsumer + get + + @Test + @Throws(Exception::class) + fun ensureAllStateMessageAreEmitted() { + val airbyteMessage1 = + AirbyteMessageCreator.createStreamStateMessage("name_one", "state_one") + val airbyteMessage2 = + AirbyteMessageCreator.createStreamStateMessage("name_two", "state_two") + val airbyteMessage3 = + AirbyteMessageCreator.createStreamStateMessage("name_three", "state_three") + val messageConsumer = messageConsumer + + messageConsumer.accept(airbyteMessage1) + messageConsumer.accept(airbyteMessage2) + messageConsumer.accept(airbyteMessage3) + + val mConsumer = mockedConsumer + val inOrder = Mockito.inOrder(mConsumer) + + inOrder.verify(mConsumer).accept(airbyteMessage1) + inOrder.verify(mConsumer).accept(airbyteMessage2) + inOrder.verify(mConsumer).accept(airbyteMessage3) + } + + internal object AirbyteMessageCreator { + fun createStreamStateMessage(name: String?, value: String): AirbyteMessage { + return AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withType(AirbyteStateMessage.AirbyteStateType.STREAM) + .withStream( + AirbyteStreamState() + .withStreamDescriptor(StreamDescriptor().withName(name)) + .withStreamState(Jsons.jsonNode(value)) + ) + ) + } + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/ProtocolVersion.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/ProtocolVersion.kt new file mode 100644 index 000000000000..652aaf2d77a0 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/ProtocolVersion.kt @@ -0,0 +1,9 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +enum class ProtocolVersion(val prefix: String) { + V0("v0"), + V1("v1") +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt new file mode 100644 index 000000000000..405a5702deb4 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination + +import java.time.Instant +import java.time.LocalDate +import java.time.ZoneId +import java.time.ZoneOffset +import java.time.format.DateTimeFormatter +import java.time.format.DateTimeParseException +import java.time.temporal.ChronoUnit +import java.util.* +import org.apache.commons.lang3.RandomStringUtils + +/** + * This class is used to generate unique namespaces for tests that follow a convention so that we + * can identify and delete old namespaces. Ideally tests would always clean up their own namespaces, + * but there are exception cases that can prevent that from happening. We want to be able to + * identify namespaces for which this has happened from their name, so we can take action. + * + * The convention we follow is `_test_YYYYMMDD_<8-character random suffix>`. + * + */ +object TestingNamespaces { + private val FORMATTER: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyyMMdd") + private const val SUFFIX_LENGTH = 5 + const val STANDARD_PREFIX: String = "test_" + + /** + * Generates a namespace that matches our testing namespace convention. + * + * @param prefix prefix to use for the namespace + * @return convention-compliant namespace + */ + /** + * Generates a namespace that matches our testing namespace convention. + * + * @return convention-compliant namespace + */ + @JvmOverloads + fun generate(prefix: String? = null): String { + val userDefinedPrefix = if (prefix != null) prefix + "_" else "" + return userDefinedPrefix + + STANDARD_PREFIX + + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC"))) + + "_" + + generateSuffix() + } + + fun generateFromOriginal(toOverwrite: String?, oldPrefix: String?, newPrefix: String?): String { + return toOverwrite!!.replace(oldPrefix!!, newPrefix!!) + } + + /** + * Checks if a namespace is older than 2 days. + * + * @param namespace to check + * @return true if the namespace is older than 2 days, otherwise false + */ + fun isOlderThan2Days(namespace: String): Boolean { + return isOlderThan(namespace, 2, ChronoUnit.DAYS) + } + + private fun isOlderThan(namespace: String, timeMagnitude: Int, timeUnit: ChronoUnit): Boolean { + return ifTestNamespaceGetDate(namespace) + .map { namespaceInstant: Instant -> + namespaceInstant.isBefore(Instant.now().minus(timeMagnitude.toLong(), timeUnit)) + } + .orElse(false) + } + + private fun ifTestNamespaceGetDate(namespace: String): Optional { + val parts = namespace.split("_".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + + if (parts.size < 3) { + return Optional.empty() + } + + // need to re-add the _ since it gets pruned out by the split. + if (STANDARD_PREFIX != parts[parts.size - 3] + "_") { + return Optional.empty() + } + + return parseDateOrEmpty(parts[parts.size - 2]) + } + + private fun parseDateOrEmpty(dateCandidate: String): Optional { + return try { + Optional.ofNullable( + LocalDate.parse(dateCandidate, FORMATTER).atStartOfDay().toInstant(ZoneOffset.UTC) + ) + } catch (e: DateTimeParseException) { + Optional.empty() + } + } + + private fun generateSuffix(): String { + return RandomStringUtils.randomAlphabetic(SUFFIX_LENGTH).lowercase(Locale.getDefault()) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt new file mode 100644 index 000000000000..80214381a591 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.argproviders + +import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion +import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import java.util.stream.Stream +import org.junit.jupiter.api.extension.ExtensionContext +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.ArgumentsProvider + +/** + * Class encapsulating all arguments required for Standard Destination Tests. + * + * All files defined here can be found in src/main/resources of this package. + */ +class DataArgumentsProvider : ArgumentsProvider { + @Throws(Exception::class) + override fun provideArguments(context: ExtensionContext): Stream { + val protocolVersion = ArgumentProviderUtil.getProtocolVersion(context) + return Stream.of( + Arguments.of( + EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion), + EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion) + ), + Arguments.of( + EDGE_CASE_CONFIG.getMessageFileVersion(protocolVersion), + EDGE_CASE_CONFIG.getCatalogFileVersion(protocolVersion) + ) // todo - need to use the new protocol to capture this. + // Arguments.of("stripe_messages.txt", "stripe_schema.json") + ) + } + + open class CatalogMessageTestConfigPair(val catalogFile: String, val messageFile: String) { + fun getCatalogFileVersion(protocolVersion: ProtocolVersion): String? { + return ArgumentProviderUtil.prefixFileNameByVersion(catalogFile, protocolVersion) + } + + fun getMessageFileVersion(protocolVersion: ProtocolVersion): String { + return ArgumentProviderUtil.prefixFileNameByVersion(messageFile, protocolVersion) + } + } + + companion object { + val EXCHANGE_RATE_CONFIG: CatalogMessageTestConfigPair = + CatalogMessageTestConfigPair("exchange_rate_catalog.json", "exchange_rate_messages.txt") + val EDGE_CASE_CONFIG: CatalogMessageTestConfigPair = + CatalogMessageTestConfigPair("edge_case_catalog.json", "edge_case_messages.txt") + val NAMESPACE_CONFIG: CatalogMessageTestConfigPair = + CatalogMessageTestConfigPair("namespace_catalog.json", "namespace_messages.txt") + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt new file mode 100644 index 000000000000..ce2e18ab7807 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.argproviders + +import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion +import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import java.util.stream.Stream +import org.junit.jupiter.api.extension.ExtensionContext +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.ArgumentsProvider +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +class DataTypeTestArgumentProvider : ArgumentsProvider { + private lateinit var protocolVersion: ProtocolVersion + + @Throws(Exception::class) + override fun provideArguments(context: ExtensionContext): Stream { + protocolVersion = ArgumentProviderUtil.getProtocolVersion(context) + return Stream.of( + getArguments(BASIC_TEST), + getArguments(ARRAY_TEST), + getArguments(OBJECT_TEST), + getArguments(OBJECT_WITH_ARRAY_TEST) + ) + } + + private fun getArguments(testConfig: CatalogMessageTestConfigWithCompatibility): Arguments { + return Arguments.of( + testConfig.getMessageFileVersion(protocolVersion), + testConfig.getCatalogFileVersion(protocolVersion), + testConfig.testCompatibility + ) + } + + @JvmRecord + data class TestCompatibility( + val requireBasicCompatibility: Boolean, + val requireArrayCompatibility: Boolean, + val requireObjectCompatibility: Boolean + ) { + fun isTestCompatible( + supportBasicDataTypeTest: Boolean, + supportArrayDataTypeTest: Boolean, + supportObjectDataTypeTest: Boolean + ): Boolean { + LOGGER.info("---- Data type test compatibility ----") + LOGGER.info("| Data type test | Require | Support |") + LOGGER.info( + "| Basic test | {} | {} |", + (if (requireBasicCompatibility) "true " else "false"), + (if (supportBasicDataTypeTest) "true " else "false") + ) + LOGGER.info( + "| Array test | {} | {} |", + (if (requireArrayCompatibility) "true " else "false"), + (if (supportArrayDataTypeTest) "true " else "false") + ) + LOGGER.info( + "| Object test | {} | {} |", + (if (requireObjectCompatibility) "true " else "false"), + (if (supportObjectDataTypeTest) "true " else "false") + ) + LOGGER.info("--------------------------------------") + + if (requireBasicCompatibility && !supportBasicDataTypeTest) { + LOGGER.warn( + "The destination doesn't support required Basic data type test. The test is skipped!" + ) + return false + } + if (requireArrayCompatibility && !supportArrayDataTypeTest) { + LOGGER.warn( + "The destination doesn't support required Array data type test. The test is skipped!" + ) + return false + } + if (requireObjectCompatibility && !supportObjectDataTypeTest) { + LOGGER.warn( + "The destination doesn't support required Object data type test. The test is skipped!" + ) + return false + } + + return true + } + } + + class CatalogMessageTestConfigWithCompatibility( + catalogFile: String, + messageFile: String, + val testCompatibility: TestCompatibility + ) : DataArgumentsProvider.CatalogMessageTestConfigPair(catalogFile, messageFile) + + companion object { + private val LOGGER: Logger = + LoggerFactory.getLogger(DataTypeTestArgumentProvider::class.java) + + const val INTEGER_TYPE_CATALOG: String = "data_type_integer_type_test_catalog.json" + const val NUMBER_TYPE_CATALOG: String = "data_type_number_type_test_catalog.json" + const val NAN_TYPE_MESSAGE: String = "nan_type_test_message.txt" + const val INFINITY_TYPE_MESSAGE: String = "nan_type_test_message.txt" + val BASIC_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_basic_test_catalog.json", + "data_type_basic_test_messages.txt", + TestCompatibility(true, false, false) + ) + val ARRAY_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_array_test_catalog.json", + "data_type_array_test_messages.txt", + TestCompatibility(true, true, false) + ) + val OBJECT_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_object_test_catalog.json", + "data_type_object_test_messages.txt", + TestCompatibility(true, false, true) + ) + val OBJECT_WITH_ARRAY_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_array_object_test_catalog.json", + "data_type_array_object_test_messages.txt", + TestCompatibility(true, true, true) + ) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt new file mode 100644 index 000000000000..1d9bdb35c52a --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.argproviders + +import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion +import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import java.util.stream.Stream +import org.junit.jupiter.api.extension.ExtensionContext +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.ArgumentsProvider + +class NumberDataTypeTestArgumentProvider : ArgumentsProvider { + private lateinit var protocolVersion: ProtocolVersion + + @Throws(Exception::class) + override fun provideArguments(context: ExtensionContext): Stream { + protocolVersion = ArgumentProviderUtil.getProtocolVersion(context) + return Stream.of( + getArguments(NUMBER_DATA_TYPE_TEST_CATALOG, NUMBER_DATA_TYPE_TEST_MESSAGES), + getArguments(NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG, NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES) + ) + } + + private fun getArguments(catalogFile: String, messageFile: String): Arguments { + return Arguments.of( + ArgumentProviderUtil.prefixFileNameByVersion(catalogFile, protocolVersion), + ArgumentProviderUtil.prefixFileNameByVersion(messageFile, protocolVersion) + ) + } + + companion object { + const val NUMBER_DATA_TYPE_TEST_CATALOG: String = "number_data_type_test_catalog.json" + const val NUMBER_DATA_TYPE_TEST_MESSAGES: String = "number_data_type_test_messages.txt" + const val NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG: String = + "number_data_type_array_test_catalog.json" + const val NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES: String = + "number_data_type_array_test_messages.txt" + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt new file mode 100644 index 000000000000..d611e09dae7a --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.argproviders.util + +import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion +import org.junit.jupiter.api.extension.ExtensionContext + +object ArgumentProviderUtil { + private const val PROTOCOL_VERSION_METHOD_NAME = "getProtocolVersion" + + /** + * This method use + * [io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion.getPrefix] to prefix + * the file name. + * + * example: + * + * filename.json -> v0/filename.json + * + * @param fileName the original file name + * @param protocolVersion supported protocol version + * @return filename with protocol version prefix + */ + fun prefixFileNameByVersion(fileName: String?, protocolVersion: ProtocolVersion): String { + return String.format("%s/%s", protocolVersion.prefix, fileName) + } + + /** + * This method use reflection to get protocol version method from provided test context. + * + * NOTE: getProtocolVersion method should be public. + * + * @param context the context in which the current test is being executed. + * @return supported protocol version + */ + @Throws(Exception::class) + fun getProtocolVersion(context: ExtensionContext): ProtocolVersion { + val c = context.requiredTestClass + // NOTE: Method should be public + val m = c.getMethod(PROTOCOL_VERSION_METHOD_NAME) + return m.invoke(c.getDeclaredConstructor().newInstance()) as ProtocolVersion + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt new file mode 100644 index 000000000000..14ed4337b457 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.comparator + +import com.fasterxml.jackson.databind.JsonNode +import java.time.ZoneOffset +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter +import java.time.format.DateTimeParseException +import org.junit.jupiter.api.Assertions +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +open class AdvancedTestDataComparator : TestDataComparator { + override fun assertSameData(expected: List, actual: List) { + LOGGER.info("Expected data {}", expected) + LOGGER.info("Actual data {}", actual) + Assertions.assertEquals(expected.size, actual.size) + val expectedIterator = expected.iterator() + val actualIterator = actual.iterator() + while (expectedIterator.hasNext() && actualIterator.hasNext()) { + compareObjects(expectedIterator.next(), actualIterator.next()) + } + } + + protected fun resolveIdentifier(identifier: String?): List { + return java.util.List.of(identifier) + } + + protected fun compareObjects(expectedObject: JsonNode, actualObject: JsonNode) { + if (!areBothEmpty(expectedObject, actualObject)) { + LOGGER.info("Expected Object : {}", expectedObject) + LOGGER.info("Actual Object : {}", actualObject) + val expectedDataIterator = expectedObject.fields() + while (expectedDataIterator.hasNext()) { + val expectedEntry = expectedDataIterator.next() + val expectedValue = expectedEntry.value + val key = expectedEntry.key + val actualValue = + ComparatorUtils.getActualValueByExpectedKey(key, actualObject) { + identifier: String? -> + this.resolveIdentifier(identifier) + } + LOGGER.info("For {} Expected {} vs Actual {}", key, expectedValue, actualValue) + assertSameValue(expectedValue, actualValue) + } + } else { + LOGGER.info("Both rows are empty.") + } + } + + private fun isJsonNodeEmpty(jsonNode: JsonNode): Boolean { + return jsonNode.isEmpty || + (jsonNode.size() == 1 && jsonNode.iterator().next().asText().isEmpty()) + } + + private fun areBothEmpty(expectedData: JsonNode, actualData: JsonNode): Boolean { + return isJsonNodeEmpty(expectedData) && isJsonNodeEmpty(actualData) + } + + // Allows subclasses to implement custom comparison asserts + protected fun assertSameValue(expectedValue: JsonNode, actualValue: JsonNode?) { + LOGGER.info("assertSameValue : {} vs {}", expectedValue, actualValue) + + Assertions.assertTrue( + compareJsonNodes(expectedValue, actualValue), + "Expected value $expectedValue vs Actual value $actualValue" + ) + } + + protected fun compareJsonNodes(expectedValue: JsonNode?, actualValue: JsonNode?): Boolean { + if (expectedValue == null || actualValue == null) { + return expectedValue == null && actualValue == null + } else if (isNumeric(expectedValue.asText())) { + return compareNumericValues(expectedValue.asText(), actualValue.asText()) + } else if (expectedValue.isBoolean) { + return compareBooleanValues(expectedValue.asText(), actualValue.asText()) + } else if (isDateTimeWithTzValue(expectedValue.asText())) { + return compareDateTimeWithTzValues(expectedValue.asText(), actualValue.asText()) + } else if (isDateTimeValue(expectedValue.asText())) { + return compareDateTimeValues(expectedValue.asText(), actualValue.asText()) + } else if (isDateValue(expectedValue.asText())) { + return compareDateValues(expectedValue.asText(), actualValue.asText()) + } else if (isTimeWithTimezone(expectedValue.asText())) { + return compareTimeWithTimeZone(expectedValue.asText(), actualValue.asText()) + } else if (isTimeWithoutTimezone(expectedValue.asText())) { + return compareTimeWithoutTimeZone(expectedValue.asText(), actualValue.asText()) + } else if (expectedValue.isArray) { + return compareArrays(expectedValue, actualValue) + } else if (expectedValue.isObject) { + compareObjects(expectedValue, actualValue) + return true + } else { + LOGGER.warn("Default comparison method!") + return compareString(expectedValue, actualValue) + } + } + + protected open fun compareString(expectedValue: JsonNode, actualValue: JsonNode): Boolean { + return expectedValue.asText() == actualValue.asText() + } + + private fun isNumeric(value: String): Boolean { + return value.matches("-?\\d+(\\.\\d+)?".toRegex()) + } + + private fun getArrayList(jsonArray: JsonNode): MutableList { + val result: MutableList = ArrayList() + jsonArray.elements().forEachRemaining { e: JsonNode -> result.add(e) } + return result + } + + protected fun compareArrays(expectedArray: JsonNode, actualArray: JsonNode): Boolean { + val expectedList: List = getArrayList(expectedArray) + val actualList = getArrayList(actualArray) + + if (expectedList.size != actualList.size) { + return false + } else { + for (expectedNode in expectedList) { + val sameActualNode = + actualList + .stream() + .filter { actualNode: JsonNode? -> + compareJsonNodes(expectedNode, actualNode) + } + .findFirst() + if (sameActualNode.isPresent) { + actualList.remove(sameActualNode.get()) + } else { + return false + } + } + return true + } + } + + protected fun compareBooleanValues( + firstBooleanValue: String, + secondBooleanValue: String + ): Boolean { + return firstBooleanValue.toBoolean() == secondBooleanValue.toBoolean() + } + + protected fun compareNumericValues( + firstNumericValue: String, + secondNumericValue: String + ): Boolean { + val firstValue = firstNumericValue.toDouble() + val secondValue = secondNumericValue.toDouble() + + return firstValue == secondValue + } + + protected val airbyteDateTimeWithTzFormatter: DateTimeFormatter + get() = DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_WITH_TZ_FORMAT) + + protected val airbyteDateTimeParsedWithTzFormatter: DateTimeFormatter + get() = DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_PARSED_FORMAT_TZ) + + protected fun isDateTimeWithTzValue(value: String): Boolean { + return !TEST_DATASET_IGNORE_LIST.contains(value) && + value.matches( + "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$".toRegex() + ) + } + + protected open fun parseDestinationDateWithTz(destinationValue: String): ZonedDateTime { + return ZonedDateTime.parse( + destinationValue, + DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_WITH_TZ_FORMAT) + ) + .withZoneSameInstant(ZoneOffset.UTC) + } + + protected fun compareDateTimeWithTzValues( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { + try { + val airbyteDate = + ZonedDateTime.parse(airbyteMessageValue, airbyteDateTimeWithTzFormatter) + .withZoneSameInstant(ZoneOffset.UTC) + val destinationDate = parseDestinationDateWithTz(destinationValue) + return airbyteDate == destinationDate + } catch (e: DateTimeParseException) { + LOGGER.warn( + "Fail to convert values to ZonedDateTime. Try to compare as text. Airbyte value({}), Destination value ({}). Exception: {}", + airbyteMessageValue, + destinationValue, + e + ) + return compareTextValues(airbyteMessageValue, destinationValue) + } + } + + protected fun isDateTimeValue(value: String): Boolean { + return value.matches( + "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$".toRegex() + ) + } + + protected fun isTimeWithTimezone(value: String): Boolean { + return value.matches("^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})$".toRegex()) + } + + protected fun isTimeWithoutTimezone(value: String): Boolean { + return value.matches("^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?$".toRegex()) + } + + protected open fun compareDateTimeValues( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { + return compareTextValues(airbyteMessageValue, destinationValue) + } + + protected fun isDateValue(value: String): Boolean { + return value.matches("^\\d{4}-\\d{2}-\\d{2}( BC)?$".toRegex()) + } + + protected open fun compareDateValues( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { + return compareTextValues(airbyteMessageValue, destinationValue) + } + + protected open fun compareTimeWithoutTimeZone( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { + return compareTextValues(airbyteMessageValue, destinationValue) + } + + protected fun compareTimeWithTimeZone( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { + return compareTextValues(airbyteMessageValue, destinationValue) + } + + protected fun compareTextValues(firstValue: String, secondValue: String): Boolean { + return firstValue == secondValue + } + + companion object { + private val LOGGER: Logger = LoggerFactory.getLogger(AdvancedTestDataComparator::class.java) + + const val AIRBYTE_DATE_FORMAT: String = "yyyy-MM-dd" + const val AIRBYTE_DATETIME_FORMAT: String = "yyyy-MM-dd'T'HH:mm:ss" + const val AIRBYTE_DATETIME_PARSED_FORMAT: String = "yyyy-MM-dd HH:mm:ss.S" + const val AIRBYTE_DATETIME_PARSED_FORMAT_TZ: String = "yyyy-MM-dd HH:mm:ss XXX" + const val AIRBYTE_DATETIME_WITH_TZ_FORMAT: String = + ("[yyyy][yy]['-']['/']['.'][' '][MMM][MM][M]['-']['/']['.'][' '][dd][d]" + + "[[' ']['T']HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X][' '][G]]]") + + // TODO revisit dataset which used date as string: exchange_rate_catalog.json + // tried to change it to date time type but some connectors failed to store it e.i. + // bigquery-denormalized + private val TEST_DATASET_IGNORE_LIST = + setOf( + "2020-08-29T00:00:00Z", + "2020-08-30T00:00:00Z", + "2020-08-31T00:00:00Z", + "2020-09-01T00:00:00Z", + "2020-09-15T16:58:52.000000Z", + "2020-03-31T00:00:00Z" + ) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt new file mode 100644 index 000000000000..e18d2ea54508 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.comparator + +import com.fasterxml.jackson.databind.JsonNode +import java.util.function.Function +import org.junit.jupiter.api.Assertions +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +class BasicTestDataComparator(private val nameResolver: Function>) : + TestDataComparator { + override fun assertSameData(expected: List, actual: List) { + LOGGER.info("Expected data {}", expected) + LOGGER.info("Actual data {}", actual) + Assertions.assertEquals(expected.size, actual.size) + val expectedIterator = expected.iterator() + val actualIterator = actual.iterator() + while (expectedIterator.hasNext() && actualIterator.hasNext()) { + val expectedData = expectedIterator.next() + val actualData = actualIterator.next() + val expectedDataIterator = expectedData.fields() + LOGGER.info("Expected row {}", expectedData) + LOGGER.info("Actual row {}", actualData) + Assertions.assertEquals(expectedData.size(), actualData.size(), "Unequal row size") + while (expectedDataIterator.hasNext()) { + val expectedEntry = expectedDataIterator.next() + val expectedValue = expectedEntry.value + val key = expectedEntry.key + val actualValue = + ComparatorUtils.getActualValueByExpectedKey(key, actualData, nameResolver) + LOGGER.info("For {} Expected {} vs Actual {}", key, expectedValue, actualValue) + assertSameValue(expectedValue, actualValue) + } + } + } + + // Allows subclasses to implement custom comparison asserts + protected fun assertSameValue(expectedValue: JsonNode?, actualValue: JsonNode?) { + Assertions.assertEquals(expectedValue, actualValue) + } + + companion object { + private val LOGGER: Logger = LoggerFactory.getLogger(BasicTestDataComparator::class.java) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/ComparatorUtils.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/ComparatorUtils.kt new file mode 100644 index 000000000000..b7252e9d94a8 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/ComparatorUtils.kt @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.comparator + +import com.fasterxml.jackson.databind.JsonNode +import java.util.function.Function + +object ComparatorUtils { + fun getActualValueByExpectedKey( + expectedKey: String?, + actualJsonNode: JsonNode, + nameResolver: Function> + ): JsonNode? { + for (actualKey in nameResolver.apply(expectedKey)) { + if (actualJsonNode.has(actualKey)) { + return actualJsonNode[actualKey] + } + } + return null + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/TestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/TestDataComparator.kt new file mode 100644 index 000000000000..601f9435ed08 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/TestDataComparator.kt @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.comparator + +import com.fasterxml.jackson.databind.JsonNode + +interface TestDataComparator { + fun assertSameData(expected: List, actual: List) +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt new file mode 100644 index 000000000000..a79b1b246d70 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.typing_deduping + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_ID +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_RAW_ID +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_EMITTED_AT +import io.airbyte.cdk.integrations.base.JavaBaseConstants.LEGACY_RAW_TABLE_COLUMNS +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator +import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType +import io.airbyte.integrations.base.destination.typing_deduping.BaseSqlGeneratorIntegrationTest +import io.airbyte.integrations.base.destination.typing_deduping.StreamId +import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState +import java.sql.SQLException +import java.util.* +import org.jooq.* +import org.jooq.conf.ParamType +import org.jooq.impl.DSL +import org.jooq.impl.SQLDataType + +abstract class JdbcSqlGeneratorIntegrationTest : + BaseSqlGeneratorIntegrationTest() { + protected abstract val database: JdbcDatabase + get + + protected abstract val structType: DataType<*> + get + + private val timestampWithTimeZoneType: DataType<*> + // TODO - can we move this class into db_destinations/testFixtures? + get() = sqlGenerator!!.toDialectType(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE) + + abstract override val sqlGenerator: JdbcSqlGenerator? + get + + protected abstract val sqlDialect: SQLDialect? + get + + private val dslContext: DSLContext + get() = DSL.using(sqlDialect) + + /** + * Many destinations require special handling to create JSON values. For example, redshift + * requires you to invoke JSON_PARSE('{...}'), and postgres requires you to CAST('{...}' AS + * JSONB). This method allows subclasses to implement that logic. + */ + protected abstract fun toJsonValue(valueAsString: String?): Field<*>? + + @Throws(SQLException::class) + private fun insertRecords( + tableName: Name, + columnNames: List, + records: List?, + vararg columnsToParseJson: String + ) { + var insert = + dslContext.insertInto( + DSL.table(tableName), + columnNames + .stream() + .map { columnName: String? -> DSL.field(DSL.quotedName(columnName)) } + .toList() + ) + for (record in records!!) { + insert = + insert.values( + columnNames + .stream() + .map { fieldName: String -> + // Convert this field to a string. Pretty naive implementation. + val column = record[fieldName] + val columnAsString = + if (column == null) { + null + } else if (column.isTextual) { + column.asText() + } else { + column.toString() + } + if (Arrays.asList(*columnsToParseJson).contains(fieldName)) { + return@map toJsonValue(columnAsString) + } else { + return@map DSL.`val`(columnAsString) + } + } + .toList() + ) + } + database.execute(insert.getSQL(ParamType.INLINED)) + } + + @Throws(Exception::class) + override fun createNamespace(namespace: String?) { + database.execute(dslContext.createSchemaIfNotExists(namespace).getSQL(ParamType.INLINED)) + } + + @Throws(Exception::class) + override fun createRawTable(streamId: StreamId?) { + database.execute( + dslContext + .createTable(DSL.name(streamId!!.rawNamespace, streamId.rawName)) + .column(COLUMN_NAME_AB_RAW_ID, SQLDataType.VARCHAR(36).nullable(false)) + .column(COLUMN_NAME_AB_EXTRACTED_AT, timestampWithTimeZoneType.nullable(false)) + .column(COLUMN_NAME_AB_LOADED_AT, timestampWithTimeZoneType) + .column(COLUMN_NAME_DATA, structType.nullable(false)) + .column(COLUMN_NAME_AB_META, structType.nullable(true)) + .getSQL(ParamType.INLINED) + ) + } + + @Throws(Exception::class) + override fun createV1RawTable(v1RawTable: StreamId?) { + database.execute( + dslContext + .createTable(DSL.name(v1RawTable!!.rawNamespace, v1RawTable.rawName)) + .column(COLUMN_NAME_AB_ID, SQLDataType.VARCHAR(36).nullable(false)) + .column(COLUMN_NAME_EMITTED_AT, timestampWithTimeZoneType.nullable(false)) + .column(COLUMN_NAME_DATA, structType.nullable(false)) + .getSQL(ParamType.INLINED) + ) + } + + @Throws(Exception::class) + public override fun insertRawTableRecords(streamId: StreamId?, records: List?) { + insertRecords( + DSL.name(streamId!!.rawNamespace, streamId.rawName), + JavaBaseConstants.V2_RAW_TABLE_COLUMN_NAMES, + records, + COLUMN_NAME_DATA, + COLUMN_NAME_AB_META + ) + } + + @Throws(Exception::class) + override fun insertV1RawTableRecords(streamId: StreamId?, records: List?) { + insertRecords( + DSL.name(streamId!!.rawNamespace, streamId.rawName), + LEGACY_RAW_TABLE_COLUMNS, + records, + COLUMN_NAME_DATA + ) + } + + @Throws(Exception::class) + override fun insertFinalTableRecords( + includeCdcDeletedAt: Boolean, + streamId: StreamId?, + suffix: String?, + records: List? + ) { + val columnNames = + if (includeCdcDeletedAt) FINAL_TABLE_COLUMN_NAMES_CDC else FINAL_TABLE_COLUMN_NAMES + insertRecords( + DSL.name(streamId!!.finalNamespace, streamId.finalName + suffix), + columnNames, + records, + COLUMN_NAME_AB_META, + "struct", + "array", + "unknown" + ) + } + + @Throws(Exception::class) + override fun dumpRawTableRecords(streamId: StreamId?): List { + return database.queryJsons( + dslContext + .selectFrom(DSL.name(streamId!!.rawNamespace, streamId.rawName)) + .getSQL(ParamType.INLINED) + ) + } + + @Throws(Exception::class) + override fun dumpFinalTableRecords(streamId: StreamId?, suffix: String?): List { + return database.queryJsons( + dslContext + .selectFrom(DSL.name(streamId!!.finalNamespace, streamId.finalName + suffix)) + .getSQL(ParamType.INLINED) + ) + } + + @Throws(Exception::class) + override fun teardownNamespace(namespace: String?) { + database.execute(dslContext.dropSchema(namespace).cascade().getSQL(ParamType.INLINED)) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt new file mode 100644 index 000000000000..df73b7dac745 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.cdk.integrations.standardtest.destination.typing_deduping + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.db.JdbcCompatibleSourceOperations +import io.airbyte.cdk.db.factory.DataSourceFactory.close +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.db.jdbc.JdbcUtils +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest +import io.airbyte.integrations.base.destination.typing_deduping.StreamId.Companion.concatenateRawTableName +import javax.sql.DataSource +import org.jooq.impl.DSL + +/** + * This class is largely the same as + * [io.airbyte.integrations.destination.snowflake.typing_deduping.AbstractSnowflakeTypingDedupingTest] + * . But (a) it uses jooq to construct the sql statements, and (b) it doesn't need to upcase + * anything. At some point we might (?) want to do a refactor to combine them. + */ +abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { + private var database: JdbcDatabase? = null + private var dataSource: DataSource? = null + + protected abstract val baseConfig: ObjectNode + /** + * Get the config as declared in GSM (or directly from the testcontainer). This class will + * do further modification to the config to ensure test isolation.i + */ + get + + protected abstract fun getDataSource(config: JsonNode?): DataSource? + + protected val sourceOperations: JdbcCompatibleSourceOperations<*> + /** + * Subclasses may need to return a custom source operations if the default one does not + * handle vendor-specific types correctly. For example, you most likely need to override + * this method to deserialize JSON columns to JsonNode. + */ + get() = JdbcUtils.defaultSourceOperations + + protected val rawSchema: String + /** + * Subclasses using a config with a nonstandard raw table schema should override this + * method. + */ + get() = JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE + + /** + * Subclasses using a config where the default schema is not in the `schema` key should override + * this method and [.setDefaultSchema]. + */ + protected fun getDefaultSchema(config: JsonNode): String { + return config["schema"].asText() + } + + /** + * Subclasses using a config where the default schema is not in the `schema` key should override + * this method and [.getDefaultSchema]. + */ + protected fun setDefaultSchema(config: JsonNode, schema: String?) { + (config as ObjectNode).put("schema", schema) + } + + override fun generateConfig(): JsonNode? { + val config: JsonNode = baseConfig + setDefaultSchema(config, "typing_deduping_default_schema$uniqueSuffix") + dataSource = getDataSource(config) + database = DefaultJdbcDatabase(dataSource!!, sourceOperations) + return config + } + + @Throws(Exception::class) + override fun dumpRawTableRecords( + streamNamespace: String?, + streamName: String? + ): List { + var streamNamespace = streamNamespace + if (streamNamespace == null) { + streamNamespace = getDefaultSchema(config!!) + } + val tableName = concatenateRawTableName(streamNamespace, streamName!!) + val schema = rawSchema + return database!!.queryJsons(DSL.selectFrom(DSL.name(schema, tableName)).sql) + } + + @Throws(Exception::class) + override fun dumpFinalTableRecords( + streamNamespace: String?, + streamName: String? + ): List { + var streamNamespace = streamNamespace + if (streamNamespace == null) { + streamNamespace = getDefaultSchema(config!!) + } + return database!!.queryJsons(DSL.selectFrom(DSL.name(streamNamespace, streamName)).sql) + } + + @Throws(Exception::class) + override fun teardownStreamAndNamespace(streamNamespace: String?, streamName: String?) { + var streamNamespace = streamNamespace + if (streamNamespace == null) { + streamNamespace = getDefaultSchema(config!!) + } + database!!.execute( + DSL.dropTableIfExists( + DSL.name(rawSchema, concatenateRawTableName(streamNamespace, streamName!!)) + ) + .sql + ) + database!!.execute(DSL.dropSchemaIfExists(DSL.name(streamNamespace)).cascade().sql) + } + + @Throws(Exception::class) + override fun globalTeardown() { + close(dataSource) + } +} diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt index 249e5ce09a28..b97b46d2a57d 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt @@ -71,7 +71,7 @@ abstract class BaseGcsDestination : BaseConnector(), Destination { override fun getConsumer( config: JsonNode, configuredCatalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { val gcsConfig: GcsDestinationConfig = GcsDestinationConfig.Companion.getGcsDestinationConfig(config) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt index b6f23623eff6..c2ebd0eac01a 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt @@ -181,7 +181,7 @@ abstract class GcsStreamCopier( } @Throws(Exception::class) - override fun generateMergeStatement(destTableName: String?): String? { + override fun generateMergeStatement(destTableName: String?): String { LOGGER.info( "Preparing to merge tmp table {} to dest table: {}, schema: {}, in destination.", tmpTableName, diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt index aebf1d02bf07..40c4a5b4c48d 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt @@ -27,9 +27,7 @@ import org.junit.jupiter.params.provider.ArgumentsSource abstract class GcsAvroParquetDestinationAcceptanceTest(s3Format: S3Format) : GcsDestinationAcceptanceTest(s3Format) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 @ParameterizedTest @ArgumentsSource(NumberDataTypeTestArgumentProvider::class) @@ -38,8 +36,8 @@ abstract class GcsAvroParquetDestinationAcceptanceTest(s3Format: S3Format) : val catalog = readCatalogFromFile(catalogFileName) val messages = readMessagesFromFile(messagesFileName) - val config = getConfig() - val defaultSchema = getDefaultSchema(config!!) + val config = this.getConfig() + val defaultSchema = getDefaultSchema(config) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt index 0ca10fc74aa9..e10cbe9a46a5 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt @@ -30,18 +30,16 @@ abstract class GcsBaseAvroDestinationAcceptanceTest : }""" ) - override fun getTestDataComparator(): TestDataComparator { - return GcsAvroTestDataComparator() - } + override fun getTestDataComparator(): TestDataComparator = GcsAvroTestDataComparator() @Throws(Exception::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -92,7 +90,5 @@ abstract class GcsBaseAvroDestinationAcceptanceTest : return resultDataTypes } - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 } diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt index 22ac5401041e..8c86a2dc2e73 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt @@ -22,9 +22,7 @@ import org.apache.commons.csv.CSVRecord import org.apache.commons.csv.QuoteMode abstract class GcsBaseCsvDestinationAcceptanceTest : GcsDestinationAcceptanceTest(S3Format.CSV) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = @@ -41,9 +39,9 @@ abstract class GcsBaseCsvDestinationAcceptanceTest : GcsDestinationAcceptanceTes @Throws(IOException::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { val objectSummaries = getAllSyncedObjects(streamName, namespace) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt index 1c436609cf33..6b9347abd1f3 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt @@ -16,9 +16,7 @@ import java.util.Map import java.util.zip.GZIPInputStream abstract class GcsBaseCsvGzipDestinationAcceptanceTest : GcsBaseCsvDestinationAcceptanceTest() { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = // config without compression defaults to GZIP diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt index eb87cf87a238..b1d8d1d165e5 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt @@ -20,9 +20,7 @@ import kotlin.collections.MutableList abstract class GcsBaseJsonlDestinationAcceptanceTest : GcsDestinationAcceptanceTest(S3Format.JSONL) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = @@ -37,9 +35,9 @@ abstract class GcsBaseJsonlDestinationAcceptanceTest : @Throws(IOException::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { val objectSummaries = getAllSyncedObjects(streamName, namespace) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt index 8c57e6926010..746e37e32617 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt @@ -15,9 +15,7 @@ import java.util.Map import java.util.zip.GZIPInputStream abstract class GcsBaseJsonlGzipDestinationAcceptanceTest : GcsBaseJsonlDestinationAcceptanceTest() { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = // config without compression defaults to GZIP diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt index f5849f7e6b81..36aa8f28a489 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt @@ -26,26 +26,22 @@ import org.apache.parquet.hadoop.ParquetReader abstract class GcsBaseParquetDestinationAcceptanceTest : GcsAvroParquetDestinationAcceptanceTest(S3Format.PARQUET) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = Jsons.jsonNode(java.util.Map.of("format_type", "Parquet", "compression_codec", "GZIP")) - override fun getTestDataComparator(): TestDataComparator { - return GcsAvroTestDataComparator() - } + override fun getTestDataComparator(): TestDataComparator = GcsAvroTestDataComparator() @Throws(IOException::class, URISyntaxException::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -87,7 +83,7 @@ abstract class GcsBaseParquetDestinationAcceptanceTest : val `object` = s3Client!!.getObject(objectSummary!!.bucketName, objectSummary.key) val uri = URI(String.format("s3a://%s/%s", `object`.bucketName, `object`.key)) val path = Path(uri) - val hadoopConfig = getHadoopConfig(config!!) + val hadoopConfig = getHadoopConfig(config) ParquetReader.builder(AvroReadSupport(), path) .withConf(hadoopConfig) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt index 2d71a152df19..d4364f0f4c14 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt @@ -58,7 +58,7 @@ protected constructor( override fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { val s3Config = configFactory.getS3DestinationConfig(config, storageProvider()) return S3ConsumerFactory() diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt index 88af39e8611b..c12a5ba57f4d 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt @@ -28,7 +28,7 @@ import org.slf4j.LoggerFactory class S3ConsumerFactory { fun create( - outputRecordCollector: Consumer?, + outputRecordCollector: Consumer, storageOperations: BlobStorageOperations, namingResolver: NamingConventionTransformer, onCreateBuffer: BufferCreateFunction, diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt index 27b1316f3044..6615a4563968 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt @@ -30,9 +30,9 @@ internal class AsyncFlush( private val stagingOperations: StagingOperations?, private val database: JdbcDatabase?, private val catalog: ConfiguredAirbyteCatalog?, - private val typerDeduperValve: TypeAndDedupeOperationValve?, - private val typerDeduper: - TyperDeduper?, // In general, this size is chosen to improve the performance of lower memory + private val typerDeduperValve: TypeAndDedupeOperationValve, + private val typerDeduper: TyperDeduper, + // In general, this size is chosen to improve the performance of lower memory // connectors. With 1 Gi // of // resource the connector will usually at most fill up around 150 MB in a single queue. By @@ -46,7 +46,7 @@ internal class AsyncFlush( streamDescToWriteConfig @Throws(Exception::class) - override fun flush(decs: StreamDescriptor, stream: Stream) { + override fun flush(decs: StreamDescriptor, stream: Stream) { val writer: CsvSerializedBuffer try { writer = @@ -91,16 +91,15 @@ internal class AsyncFlush( } val writeConfig: WriteConfig = streamDescToWriteConfig.getValue(decs) - val schemaName: String = writeConfig.getOutputSchemaName() - val stageName = - stagingOperations!!.getStageName(schemaName, writeConfig.getOutputTableName()) + val schemaName: String = writeConfig.outputSchemaName + val stageName = stagingOperations!!.getStageName(schemaName, writeConfig.outputTableName) val stagingPath = stagingOperations.getStagingPath( GeneralStagingFunctions.RANDOM_CONNECTION_ID, schemaName, - writeConfig.getStreamName(), - writeConfig.getOutputTableName(), - writeConfig.getWriteDatetime() + writeConfig.streamName, + writeConfig.outputTableName, + writeConfig.writeDatetime ) try { val stagedFile = @@ -116,11 +115,11 @@ internal class AsyncFlush( stageName, stagingPath, List.of(stagedFile), - writeConfig.getOutputTableName(), + writeConfig.outputTableName, schemaName, stagingOperations, - writeConfig.getNamespace(), - writeConfig.getStreamName(), + writeConfig.namespace, + writeConfig.streamName, typerDeduperValve, typerDeduper ) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt index 90f4199bbf87..09125c12b016 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt @@ -111,6 +111,10 @@ private constructor( } fun createAsync(): SerializedAirbyteMessageConsumer { + val typerDeduper = this.typerDeduper!! + val typerDeduperValve = this.typerDeduperValve!! + val stagingOperations = this.stagingOperations!! + val writeConfigs: List = createWriteConfigs( namingResolver, @@ -135,7 +139,7 @@ private constructor( return AsyncStreamConsumer( outputRecordCollector!!, GeneralStagingFunctions.onStartFunction( - database, + database!!, stagingOperations, writeConfigs, typerDeduper @@ -170,13 +174,13 @@ private constructor( fun builder( outputRecordCollector: Consumer, database: JdbcDatabase?, - stagingOperations: StagingOperations?, + stagingOperations: StagingOperations, namingResolver: NamingConventionTransformer?, config: JsonNode?, catalog: ConfiguredAirbyteCatalog, purgeStagingData: Boolean, - typerDeduperValve: TypeAndDedupeOperationValve?, - typerDeduper: TyperDeduper?, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper, parsedCatalog: ParsedCatalog?, defaultNamespace: String?, useDestinationsV2Columns: Boolean @@ -230,7 +234,7 @@ private constructor( .stream() .map( Function { config: WriteConfig -> - config.getNamespace() + "." + config.getStreamName() + config.namespace + "." + config.streamName } ) .collect(Collectors.joining(", ")) @@ -241,9 +245,7 @@ private constructor( } private fun toStreamDescriptor(config: WriteConfig): StreamDescriptor { - return StreamDescriptor() - .withName(config.getStreamName()) - .withNamespace(config.getNamespace()) + return StreamDescriptor().withName(config.streamName).withNamespace(config.namespace) } /** diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt index 019550b1664d..3a5c2572dccc 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt @@ -31,7 +31,7 @@ abstract class S3AvroParquetDestinationAcceptanceTest protected constructor(s3Fo val catalog = readCatalogFromFile(catalogFileName) val messages = readMessagesFromFile(messagesFileName) - val config = getConfig() + val config = this.getConfig() val defaultSchema = getDefaultSchema(config!!) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt index f556d92e130c..676e9f98cf17 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt @@ -8,10 +8,7 @@ import java.time.* import java.time.format.DateTimeFormatter class S3AvroParquetTestDataComparator : AdvancedTestDataComparator() { - override fun compareDateValues( - airbyteMessageValue: String?, - destinationValue: String - ): Boolean { + override fun compareDateValues(airbyteMessageValue: String, destinationValue: String): Boolean { val destinationDate = LocalDate.ofEpochDay(destinationValue.toLong()) val expectedDate = LocalDate.parse( @@ -30,7 +27,7 @@ class S3AvroParquetTestDataComparator : AdvancedTestDataComparator() { } override fun compareDateTimeValues( - airbyteMessageValue: String?, + airbyteMessageValue: String, destinationValue: String ): Boolean { val format = DateTimeFormatter.ofPattern(AdvancedTestDataComparator.AIRBYTE_DATETIME_FORMAT) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt index 483008284132..f346315b7e66 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt @@ -7,6 +7,7 @@ import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.ObjectReader import io.airbyte.cdk.integrations.destination.s3.avro.AvroConstants import io.airbyte.cdk.integrations.destination.s3.util.AvroRecordHelper +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator import io.airbyte.commons.json.Jsons import java.util.* import org.apache.avro.Schema @@ -38,11 +39,12 @@ abstract class S3BaseAvroDestinationAcceptanceTest protected constructor() : @Throws(Exception::class) override fun retrieveRecords( testEnv: TestDestinationEnv?, - streamName: String, + streamName: String?, namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = AvroRecordHelper.getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = + AvroRecordHelper.getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -69,7 +71,7 @@ abstract class S3BaseAvroDestinationAcceptanceTest protected constructor() : return jsonRecords } - override fun getTestDataComparator() = S3AvroParquetTestDataComparator() + override fun getTestDataComparator(): TestDataComparator = S3AvroParquetTestDataComparator() @Throws(Exception::class) override fun retrieveDataTypesFromPersistedFiles( diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt index 7d799c14c69e..381e75113e62 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt @@ -37,7 +37,7 @@ abstract class S3BaseJsonlDestinationAcceptanceTest protected constructor() : testEnv: TestDestinationEnv?, streamName: String?, namespace: String?, - streamSchema: JsonNode? + streamSchema: JsonNode ): List { val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt index cb746b91d4c3..401e40a15061 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt @@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.ObjectReader import io.airbyte.cdk.integrations.destination.s3.avro.AvroConstants import io.airbyte.cdk.integrations.destination.s3.parquet.S3ParquetWriter import io.airbyte.cdk.integrations.destination.s3.util.AvroRecordHelper +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator import io.airbyte.commons.json.Jsons import java.io.IOException import java.net.URI @@ -28,11 +29,12 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : @Throws(IOException::class, URISyntaxException::class) override fun retrieveRecords( testEnv: TestDestinationEnv?, - streamName: String, + streamName: String?, namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = AvroRecordHelper.getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = + AvroRecordHelper.getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -41,7 +43,7 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : val `object` = s3Client!!.getObject(objectSummary!!.bucketName, objectSummary.key) val uri = URI(String.format("s3a://%s/%s", `object`.bucketName, `object`.key)) val path = Path(uri) - val hadoopConfig = S3ParquetWriter.getHadoopConfig(config) + val hadoopConfig = S3ParquetWriter.getHadoopConfig(s3DestinationConfig) ParquetReader.builder(AvroReadSupport(), path) .withConf(hadoopConfig) @@ -62,7 +64,7 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : return jsonRecords } - override fun getTestDataComparator() = S3AvroParquetTestDataComparator() + override fun getTestDataComparator(): TestDataComparator = S3AvroParquetTestDataComparator() @Throws(Exception::class) override fun retrieveDataTypesFromPersistedFiles( @@ -76,7 +78,7 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : val `object` = s3Client!!.getObject(objectSummary!!.bucketName, objectSummary.key) val uri = URI(String.format("s3a://%s/%s", `object`.bucketName, `object`.key)) val path = Path(uri) - val hadoopConfig = S3ParquetWriter.getHadoopConfig(config) + val hadoopConfig = S3ParquetWriter.getHadoopConfig(s3DestinationConfig) ParquetReader.builder(AvroReadSupport(), path) .withConf(hadoopConfig) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt index 6ab0884c23d1..b3e2b4cd51ad 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt @@ -13,6 +13,7 @@ import io.airbyte.cdk.integrations.destination.NamingConventionTransformer import io.airbyte.cdk.integrations.destination.s3.util.S3NameTransformer import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator import io.airbyte.commons.io.IOs import io.airbyte.commons.jackson.MoreMappers import io.airbyte.commons.json.Jsons @@ -39,9 +40,9 @@ abstract class S3DestinationAcceptanceTest protected constructor(protected val outputFormat: S3Format) : DestinationAcceptanceTest() { protected val secretFilePath: String = "secrets/config.json" protected var configJson: JsonNode? = null - protected lateinit var config: S3DestinationConfig + protected lateinit var s3DestinationConfig: S3DestinationConfig protected var s3Client: AmazonS3? = null - protected lateinit var nameTransformer: NamingConventionTransformer + protected lateinit var s3nameTransformer: NamingConventionTransformer protected var s3StorageOperations: S3StorageOperations? = null protected val baseConfigJson: JsonNode @@ -54,6 +55,8 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta return null } + override fun getConfig(): JsonNode = configJson!! + override fun getFailCheckConfig(): JsonNode { val baseJson = baseConfigJson val failCheckJson = Jsons.clone(baseJson) @@ -68,20 +71,20 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta streamName: String?, namespace: String? ): List { - val namespaceStr = nameTransformer!!.getNamespace(namespace!!) - val streamNameStr = nameTransformer!!.getIdentifier(streamName!!) + val namespaceStr = s3nameTransformer!!.getNamespace(namespace!!) + val streamNameStr = s3nameTransformer!!.getIdentifier(streamName!!) val outputPrefix = s3StorageOperations!!.getBucketObjectPath( namespaceStr, streamNameStr, DateTime.now(DateTimeZone.UTC), - config!!.pathFormat + s3DestinationConfig!!.pathFormat ) // the child folder contains a non-deterministic epoch timestamp, so use the parent folder val parentFolder = outputPrefix.substring(0, outputPrefix.lastIndexOf("/") + 1) val objectSummaries = s3Client!! - .listObjects(config!!.bucketName, parentFolder) + .listObjects(s3DestinationConfig!!.bucketName, parentFolder) .objectSummaries .stream() .filter { o: S3ObjectSummary -> o.key.contains("$streamNameStr/") } @@ -105,7 +108,7 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta * * Construct the S3 destination config. * * Construct the S3 client. */ - override fun setup(testEnv: TestDestinationEnv?, TEST_SCHEMAS: HashSet?) { + override fun setup(testEnv: TestDestinationEnv, TEST_SCHEMAS: HashSet) { val baseConfigJson = baseConfigJson // Set a random s3 bucket path for each integration test val configJson = Jsons.clone(baseConfigJson) @@ -119,19 +122,27 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta .put("s3_bucket_path", testBucketPath) .set("format", formatConfig) this.configJson = configJson - this.config = S3DestinationConfig.getS3DestinationConfig(configJson, storageProvider()) - LOGGER.info("Test full path: {}/{}", config.bucketName, config.bucketPath) + this.s3DestinationConfig = + S3DestinationConfig.getS3DestinationConfig(configJson, storageProvider()) + LOGGER.info( + "Test full path: {}/{}", + s3DestinationConfig.bucketName, + s3DestinationConfig.bucketPath + ) - this.s3Client = config.getS3Client() - this.nameTransformer = S3NameTransformer() - this.s3StorageOperations = S3StorageOperations(nameTransformer, s3Client!!, config) + this.s3Client = s3DestinationConfig.getS3Client() + this.s3nameTransformer = S3NameTransformer() + this.s3StorageOperations = + S3StorageOperations(s3nameTransformer, s3Client!!, s3DestinationConfig) } /** Remove all the S3 output from the tests. */ - override fun tearDown(testEnv: TestDestinationEnv?) { + override fun tearDown(testEnv: TestDestinationEnv) { val keysToDelete: MutableList = LinkedList() val objects = - s3Client!!.listObjects(config!!.bucketName, config!!.bucketPath).objectSummaries + s3Client!! + .listObjects(s3DestinationConfig!!.bucketName, s3DestinationConfig!!.bucketPath) + .objectSummaries for (`object` in objects) { keysToDelete.add(DeleteObjectsRequest.KeyVersion(`object`.key)) } @@ -139,18 +150,18 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta if (keysToDelete.size > 0) { LOGGER.info( "Tearing down test bucket path: {}/{}", - config!!.bucketName, - config!!.bucketPath + s3DestinationConfig!!.bucketName, + s3DestinationConfig!!.bucketPath ) val result = s3Client!!.deleteObjects( - DeleteObjectsRequest(config!!.bucketName).withKeys(keysToDelete) + DeleteObjectsRequest(s3DestinationConfig!!.bucketName).withKeys(keysToDelete) ) LOGGER.info("Deleted {} file(s).", result.deletedObjects.size) } } - override fun getTestDataComparator() = AdvancedTestDataComparator() + override fun getTestDataComparator(): TestDataComparator = AdvancedTestDataComparator() override fun supportBasicDataTypeTest(): Boolean { return true diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt index 8b05e88e7a15..ae080b8162ed 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt @@ -196,7 +196,7 @@ abstract class BaseDestinationV1V2Migrator : Destination */ abstract fun schemaMatchesExpectation( existingTable: DialectTableDefinition, - columns: Collection? + columns: Collection ): Boolean /** @@ -219,7 +219,7 @@ abstract class BaseDestinationV1V2Migrator : Destination * @param streamConfig the stream in question * @return the valid v1 name and namespace for the same stream */ - abstract fun convertToV1RawName(streamConfig: StreamConfig?): NamespacedTableName + abstract fun convertToV1RawName(streamConfig: StreamConfig): NamespacedTableName companion object { protected val LOGGER: Logger = diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt index 7820645b0596..c52bb15e4e7c 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt @@ -122,7 +122,7 @@ constructor( val primaryKey = stream.primaryKey .stream() - .map { key: List -> sqlGenerator.buildColumnId(key[0]) } + .map { key: List -> sqlGenerator.buildColumnId(key[0]) } .toList() require(stream.cursorField.size <= 1) { "Only top-level cursors are supported" } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt index 5373fe894cba..d1e33220c4b0 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt @@ -377,7 +377,7 @@ class DefaultTyperDeduper( } @Throws(Exception::class) - override fun typeAndDedupe(streamSyncSummaries: Map) { + override fun typeAndDedupe(streamSyncSummaries: Map) { LOGGER.info("Typing and deduping all tables") val typeAndDedupeTasks: MutableSet>> = HashSet() parsedCatalog.streams diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt index b86be15a360b..69802466706c 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt @@ -4,7 +4,7 @@ package io.airbyte.integrations.base.destination.typing_deduping interface DestinationHandler { - @Throws(Exception::class) fun execute(sql: Sql?) + @Throws(Exception::class) fun execute(sql: Sql) /** * Fetch the current state of the destination for the given streams. This method MUST create the @@ -14,9 +14,9 @@ interface DestinationHandler { */ @Throws(Exception::class) fun gatherInitialState( - streamConfigs: List? + streamConfigs: List ): List> @Throws(Exception::class) - fun commitDestinationStates(destinationStates: Map?) + fun commitDestinationStates(destinationStates: Map) } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt index 4bc3b4e24f0b..60cfbca93809 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt @@ -94,7 +94,7 @@ class NoOpTyperDeduperWithV1V2Migrations) { + override fun typeAndDedupe(streamSyncSummaries: Map) { log.info("Skipping TypeAndDedupe final") } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt index 8c83383ae9e9..26df693cf76c 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt @@ -48,7 +48,7 @@ class NoopTyperDeduper : TyperDeduper { override fun commitFinalTables() {} - override fun typeAndDedupe(streamSyncSummaries: Map) {} + override fun typeAndDedupe(streamSyncSummaries: Map) {} override fun cleanup() {} } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt index 124fa99c91a0..cbf522367509 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt @@ -7,13 +7,13 @@ import java.time.Instant import java.util.* interface SqlGenerator { - fun buildStreamId(namespace: String?, name: String?, rawNamespaceOverride: String?): StreamId + fun buildStreamId(namespace: String, name: String, rawNamespaceOverride: String): StreamId - fun buildColumnId(name: String?): ColumnId { + fun buildColumnId(name: String): ColumnId { return buildColumnId(name, "") } - fun buildColumnId(name: String?, suffix: String?): ColumnId + fun buildColumnId(name: String, suffix: String?): ColumnId /** * Generate a SQL statement to create a fresh table to match the given stream. @@ -26,7 +26,7 @@ interface SqlGenerator { * the table already exists. If you're passing a non-empty prefix, you likely want to set this * to true. */ - fun createTable(stream: StreamConfig?, suffix: String?, force: Boolean): Sql + fun createTable(stream: StreamConfig, suffix: String, force: Boolean): Sql /** * Used to create either the airbyte_internal or final schemas if they don't exist @@ -64,11 +64,11 @@ interface SqlGenerator { * which handles casting exceptions. */ fun updateTable( - stream: StreamConfig?, + stream: StreamConfig, finalSuffix: String?, minRawTimestamp: Optional, useExpensiveSaferCasting: Boolean - ): Sql? + ): Sql /** * Drop the previous final table, and rename the new final table to match the old final table. @@ -76,7 +76,7 @@ interface SqlGenerator { * This method may assume that the stream is an OVERWRITE stream, and that the final suffix is * non-empty. Callers are responsible for verifying those are true. */ - fun overwriteFinalTable(stream: StreamId?, finalSuffix: String?): Sql? + fun overwriteFinalTable(stream: StreamId, finalSuffix: String?): Sql /** * Creates a sql query which will create a v2 raw table from the v1 raw table, then performs a @@ -87,20 +87,20 @@ interface SqlGenerator { * @param tableName name of the v2 raw table * @return a string containing the necessary sql to migrate */ - fun migrateFromV1toV2(streamId: StreamId?, namespace: String?, tableName: String?): Sql? + fun migrateFromV1toV2(streamId: StreamId, namespace: String?, tableName: String?): Sql /** * Typically we need to create a soft reset temporary table and clear loaded at values * * @return */ - fun prepareTablesForSoftReset(stream: StreamConfig): Sql? { + fun prepareTablesForSoftReset(stream: StreamConfig): Sql { val createTempTable = createTable(stream, TypeAndDedupeTransaction.SOFT_RESET_SUFFIX, true) val clearLoadedAt = clearLoadedAt(stream.id) return Sql.Companion.concat(createTempTable, clearLoadedAt) } - fun clearLoadedAt(streamId: StreamId?): Sql + fun clearLoadedAt(streamId: StreamId): Sql /** * Implementation specific if there is no option to retry again with safe casted SQL or the diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt index 4ad8c84932ab..60a8fb24fe75 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt @@ -99,7 +99,7 @@ interface TyperDeduper { * [StreamSyncSummary.DEFAULT]. */ @Throws(Exception::class) - fun typeAndDedupe(streamSyncSummaries: Map) + fun typeAndDedupe(streamSyncSummaries: Map) @Throws(Exception::class) fun commitFinalTables() diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt index 61d53864d036..4229facf0b95 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt @@ -12,9 +12,9 @@ import java.util.List import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test -import org.mockito.ArgumentMatchers import org.mockito.Mockito import org.mockito.invocation.InvocationOnMock +import org.mockito.kotlin.any internal class CatalogParserTest { private lateinit var sqlGenerator: SqlGenerator @@ -24,31 +24,18 @@ internal class CatalogParserTest { fun setup() { sqlGenerator = Mockito.mock(SqlGenerator::class.java) // noop quoting logic - Mockito.`when`(sqlGenerator.buildColumnId(ArgumentMatchers.any())).thenAnswer { - invocation: InvocationOnMock -> + Mockito.`when`(sqlGenerator.buildColumnId(any())).thenAnswer { invocation: InvocationOnMock + -> val fieldName = invocation.getArgument(0) ColumnId(fieldName, fieldName, fieldName) } - Mockito.`when`( - sqlGenerator.buildStreamId( - ArgumentMatchers.any(), - ArgumentMatchers.any(), - ArgumentMatchers.any() - ) - ) - .thenAnswer { invocation: InvocationOnMock -> - val namespace = invocation.getArgument(0) - val name = invocation.getArgument(1) - val rawNamespace = invocation.getArgument(1) - StreamId( - namespace, - name, - rawNamespace, - namespace + "_abab_" + name, - namespace, - name - ) - } + Mockito.`when`(sqlGenerator.buildStreamId(any(), any(), any())).thenAnswer { + invocation: InvocationOnMock -> + val namespace = invocation.getArgument(0) + val name = invocation.getArgument(1) + val rawNamespace = invocation.getArgument(1) + StreamId(namespace, name, rawNamespace, namespace + "_abab_" + name, namespace, name) + } parser = CatalogParser(sqlGenerator) } @@ -59,29 +46,23 @@ internal class CatalogParserTest { */ @Test fun finalNameCollision() { - Mockito.`when`( - sqlGenerator!!.buildStreamId( - ArgumentMatchers.any(), - ArgumentMatchers.any(), - ArgumentMatchers.any() - ) - ) - .thenAnswer { invocation: InvocationOnMock -> - val originalNamespace = invocation.getArgument(0) - val originalName = (invocation.getArgument(1)) - val originalRawNamespace = (invocation.getArgument(1)) + Mockito.`when`(sqlGenerator!!.buildStreamId(any(), any(), any())).thenAnswer { + invocation: InvocationOnMock -> + val originalNamespace = invocation.getArgument(0) + val originalName = (invocation.getArgument(1)) + val originalRawNamespace = (invocation.getArgument(1)) - // emulate quoting logic that causes a name collision - val quotedName = originalName.replace("bar".toRegex(), "") - StreamId( - originalNamespace, - quotedName, - originalRawNamespace, - originalNamespace + "_abab_" + quotedName, - originalNamespace, - originalName - ) - } + // emulate quoting logic that causes a name collision + val quotedName = originalName.replace("bar".toRegex(), "") + StreamId( + originalNamespace, + quotedName, + originalRawNamespace, + originalNamespace + "_abab_" + quotedName, + originalNamespace, + originalName + ) + } val catalog = ConfiguredAirbyteCatalog() .withStreams(List.of(stream("a", "foobarfoo"), stream("a", "foofoo"))) @@ -100,13 +81,13 @@ internal class CatalogParserTest { */ @Test fun columnNameCollision() { - Mockito.`when`(sqlGenerator!!.buildColumnId(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenAnswer { invocation: InvocationOnMock -> - val originalName = invocation.getArgument(0) - // emulate quoting logic that causes a name collision - val quotedName = originalName.replace("bar".toRegex(), "") - ColumnId(quotedName, originalName, quotedName) - } + Mockito.`when`(sqlGenerator!!.buildColumnId(any(), any())).thenAnswer { + invocation: InvocationOnMock -> + val originalName = invocation.getArgument(0) + // emulate quoting logic that causes a name collision + val quotedName = originalName.replace("bar".toRegex(), "") + ColumnId(quotedName, originalName, quotedName) + } val schema = Jsons.deserialize( """ diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt index fbd5a43168ea..9d56a7e5d23b 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt @@ -26,6 +26,7 @@ import org.junit.jupiter.api.Test import org.mockito.ArgumentMatchers import org.mockito.Mockito import org.mockito.Mockito.mock +import org.mockito.kotlin.any class DefaultTyperDeduperTest { private var parsedCatalog: ParsedCatalog? = null @@ -51,7 +52,7 @@ class DefaultTyperDeduperTest { private lateinit var destinationHandler: DestinationHandler private lateinit var initialStates: List> - private lateinit var updatedStates: MutableMap + private lateinit var updatedStates: MutableMap private lateinit var migrator: DestinationV1V2Migrator private lateinit var typerDeduper: TyperDeduper @@ -206,7 +207,7 @@ class DefaultTyperDeduperTest { Mockito.clearInvocations(destinationHandler) typerDeduper!!.commitFinalTables() - Mockito.verify(destinationHandler, Mockito.never()).execute(ArgumentMatchers.any()) + Mockito.verify(destinationHandler, Mockito.never()).execute(any()) } /** @@ -314,7 +315,7 @@ class DefaultTyperDeduperTest { Mockito.clearInvocations(destinationHandler) typerDeduper!!.prepareFinalTables() - Mockito.verify(destinationHandler, Mockito.never()).execute(ArgumentMatchers.any()) + Mockito.verify(destinationHandler, Mockito.never()).execute(any()) } /** @@ -466,9 +467,7 @@ class DefaultTyperDeduperTest { @Test @Throws(Exception::class) fun failedSetup() { - Mockito.doThrow(RuntimeException("foo")) - .`when`(destinationHandler) - .execute(ArgumentMatchers.any()) + Mockito.doThrow(RuntimeException("foo")).`when`(destinationHandler).execute(any()) Assertions.assertThrows(Exception::class.java) { typerDeduper!!.prepareFinalTables() } Mockito.clearInvocations(destinationHandler) @@ -636,7 +635,7 @@ class DefaultTyperDeduperTest { MockState(true, true, true) ) ) - Mockito.verify(destinationHandler).gatherInitialState(ArgumentMatchers.any()) + Mockito.verify(destinationHandler).gatherInitialState(any()) Mockito.verify(destinationHandler) .execute( separately( @@ -756,7 +755,7 @@ class DefaultTyperDeduperTest { MockState(true, true, true) ) ) - Mockito.verify(destinationHandler).gatherInitialState(ArgumentMatchers.any()) + Mockito.verify(destinationHandler).gatherInitialState(any()) Mockito.verify(destinationHandler) .execute( separately( @@ -865,7 +864,7 @@ class DefaultTyperDeduperTest { MockState(true, false, false) ) ) - Mockito.verify(destinationHandler).gatherInitialState(ArgumentMatchers.any()) + Mockito.verify(destinationHandler).gatherInitialState(any()) Mockito.verify(destinationHandler) .execute( separately( diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt index 19a7dc6066d8..c427253bdf56 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt @@ -15,9 +15,9 @@ import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.ArgumentsProvider import org.junit.jupiter.params.provider.ArgumentsSource -import org.mockito.ArgumentMatchers import org.mockito.Mockito import org.mockito.Mockito.mock +import org.mockito.kotlin.any import org.mockito.kotlin.spy class DestinationV1V2MigratorTest { @@ -113,7 +113,7 @@ class DestinationV1V2MigratorTest { migrator.migrate(sqlGenerator, handler, stream) Mockito.verify(handler).execute(sql) // Exception thrown when executing sql, TableNotMigratedException thrown - Mockito.doThrow(Exception::class.java).`when`(handler).execute(ArgumentMatchers.any()) + Mockito.doThrow(Exception::class.java).`when`(handler).execute(any()) val exception = Assertions.assertThrows(TableNotMigratedException::class.java) { migrator.migrate(sqlGenerator, handler, stream) @@ -136,7 +136,7 @@ class DestinationV1V2MigratorTest { v1RawTableSchemaMatches: Boolean ): BaseDestinationV1V2Migrator<*> { val migrator: BaseDestinationV1V2Migrator = spy() - Mockito.`when`(migrator.doesAirbyteInternalNamespaceExist(ArgumentMatchers.any())) + Mockito.`when`(migrator.doesAirbyteInternalNamespaceExist(any())) .thenReturn(v2NamespaceExists) val existingTable = if (v2TableExists) Optional.of("v2_raw") else Optional.empty() @@ -156,7 +156,7 @@ class DestinationV1V2MigratorTest { ) .thenReturn(v2RawSchemaMatches) - Mockito.`when`(migrator.convertToV1RawName(ArgumentMatchers.any())) + Mockito.`when`(migrator.convertToV1RawName(any())) .thenReturn(NamespacedTableName("v1_raw_namespace", "v1_raw_table")) val existingV1RawTable = if (v1RawTableExists) Optional.of("v1_raw") else Optional.empty() @@ -174,7 +174,13 @@ class DestinationV1V2MigratorTest { @Throws(Exception::class) fun noIssuesMigrator(): BaseDestinationV1V2Migrator<*> { - return makeMockMigrator(true, false, true, true, true) + return makeMockMigrator( + v2NamespaceExists = true, + v2TableExists = false, + v2RawSchemaMatches = true, + v1RawTableExists = true, + v1RawTableSchemaMatches = true + ) } } } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt index fffc08cc4e28..74265393ccff 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt @@ -11,14 +11,14 @@ import java.util.function.Function /** Basic SqlGenerator mock. See [DefaultTyperDeduperTest] for example usage. */ internal class MockSqlGenerator : SqlGenerator { override fun buildStreamId( - namespace: String?, - name: String?, - rawNamespaceOverride: String? + namespace: String, + name: String, + rawNamespaceOverride: String ): StreamId { throw RuntimeException() } - override fun buildColumnId(name: String?, suffix: String?): ColumnId { + override fun buildColumnId(name: String, suffix: String?): ColumnId { throw RuntimeException() } @@ -26,16 +26,16 @@ internal class MockSqlGenerator : SqlGenerator { return of("CREATE SCHEMA $schema") } - override fun createTable(stream: StreamConfig?, suffix: String?, force: Boolean): Sql { + override fun createTable(stream: StreamConfig, suffix: String, force: Boolean): Sql { return of("CREATE TABLE " + stream!!.id.finalTableId("", suffix!!)) } override fun updateTable( - stream: StreamConfig?, + stream: StreamConfig, finalSuffix: String?, minRawTimestamp: Optional, useExpensiveSaferCasting: Boolean - ): Sql? { + ): Sql { val timestampFilter = minRawTimestamp .map(Function { timestamp: Instant? -> " WHERE extracted_at > $timestamp" }) @@ -48,7 +48,7 @@ internal class MockSqlGenerator : SqlGenerator { ) } - override fun overwriteFinalTable(stream: StreamId?, finalSuffix: String?): Sql? { + override fun overwriteFinalTable(stream: StreamId, finalSuffix: String?): Sql { return of( "OVERWRITE TABLE " + stream!!.finalTableId("") + @@ -58,10 +58,10 @@ internal class MockSqlGenerator : SqlGenerator { } override fun migrateFromV1toV2( - streamId: StreamId?, + streamId: StreamId, namespace: String?, tableName: String? - ): Sql? { + ): Sql { return of( "MIGRATE TABLE " + java.lang.String.join(".", namespace, tableName) + @@ -70,7 +70,7 @@ internal class MockSqlGenerator : SqlGenerator { ) } - override fun prepareTablesForSoftReset(stream: StreamConfig): Sql? { + override fun prepareTablesForSoftReset(stream: StreamConfig): Sql { return of( "PREPARE " + java.lang.String.join(".", stream.id.originalNamespace, stream.id.originalName) + @@ -78,7 +78,7 @@ internal class MockSqlGenerator : SqlGenerator { ) } - override fun clearLoadedAt(streamId: StreamId?): Sql { + override fun clearLoadedAt(streamId: StreamId): Sql { throw RuntimeException() } } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt index 264d3456db1f..3e09bef3cf0e 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt @@ -259,7 +259,8 @@ abstract class BaseSqlGeneratorIntegrationTest { - val initialState = destinationHandler!!.gatherInitialState(java.util.List.of(streamConfig)) + val initialState = + destinationHandler!!.gatherInitialState(java.util.List.of(streamConfig!!)) Assertions.assertEquals( 1, initialState!!.size, @@ -276,7 +277,7 @@ abstract class BaseSqlGeneratorIntegrationTest = + val FINAL_TABLE_COLUMN_NAMES: List = listOf( "_airbyte_raw_id", "_airbyte_extracted_at",