diff --git a/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt b/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt index db71b08e8c75..41b9621eec19 100644 --- a/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt +++ b/airbyte-cdk/java/airbyte-cdk/azure-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/azure/AzureBlobStorageStreamCopier.kt @@ -172,7 +172,7 @@ abstract class AzureBlobStorageStreamCopier( } @Throws(Exception::class) - override fun generateMergeStatement(destTableName: String?): String? { + override fun generateMergeStatement(destTableName: String?): String { LOGGER.info( "Preparing to merge tmp table {} to dest table: {}, schema: {}, in destination.", tmpTableName, diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt index 3cf2c234ce93..0094e8c169e1 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/Destination.kt @@ -34,7 +34,7 @@ interface Destination : Integration { fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? /** @@ -53,7 +53,7 @@ interface Destination : Integration { fun getSerializedMessageConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): SerializedAirbyteMessageConsumer? { return ShimToSerializedAirbyteMessageConsumer( getConsumer(config, catalog, outputRecordCollector) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt index 6a04d04180da..b57a1047d427 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunner.kt @@ -46,12 +46,12 @@ class IntegrationRunner @VisibleForTesting internal constructor( cliParser: IntegrationCliParser, - outputRecordCollector: Consumer, + outputRecordCollector: Consumer, destination: Destination?, source: Source? ) { private val cliParser: IntegrationCliParser - private val outputRecordCollector: Consumer + private val outputRecordCollector: Consumer private val integration: Integration private val destination: Destination? private val source: Source? @@ -61,7 +61,7 @@ internal constructor( destination: Destination? ) : this( IntegrationCliParser(), - Consumer { message: AirbyteMessage? -> + Consumer { message: AirbyteMessage -> Destination.Companion.defaultOutputRecordCollector(message) }, destination, @@ -72,7 +72,7 @@ internal constructor( source: Source? ) : this( IntegrationCliParser(), - Consumer { message: AirbyteMessage? -> + Consumer { message: AirbyteMessage -> Destination.Companion.defaultOutputRecordCollector(message) }, null, @@ -99,7 +99,7 @@ internal constructor( @VisibleForTesting internal constructor( cliParser: IntegrationCliParser, - outputRecordCollector: Consumer, + outputRecordCollector: Consumer, destination: Destination?, source: Source?, jsonSchemaValidator: JsonSchemaValidator @@ -254,7 +254,7 @@ internal constructor( private fun produceMessages( messageIterator: AutoCloseableIterator, - recordCollector: Consumer + recordCollector: Consumer ) { messageIterator!!.airbyteStream.ifPresent { s: AirbyteStreamNameNamespacePair? -> LOGGER.debug("Producing messages for stream {}...", s) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt index 6bf42876047b..916da5f6e5a6 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/spec_modification/SpecModifyingDestination.kt @@ -31,7 +31,7 @@ abstract class SpecModifyingDestination(private val destination: Destination) : override fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { return destination.getConsumer(config, catalog, outputRecordCollector) } @@ -40,7 +40,7 @@ abstract class SpecModifyingDestination(private val destination: Destination) : override fun getSerializedMessageConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): SerializedAirbyteMessageConsumer? { return destination.getSerializedMessageConsumer(config, catalog, outputRecordCollector) } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt index 999eba99baf8..aaed6e6fb1d1 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/base/ssh/SshWrappedDestination.kt @@ -94,7 +94,7 @@ class SshWrappedDestination : Destination { override fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { val tunnel = getTunnelInstance(config) @@ -120,7 +120,7 @@ class SshWrappedDestination : Destination { override fun getSerializedMessageConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): SerializedAirbyteMessageConsumer? { val clone = Jsons.clone(config) val connectionOptionsConfig: Optional = diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt index 49ed8224f5b1..65dc82bd7d14 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/function/DestinationFlushFunction.kt @@ -38,7 +38,7 @@ interface DestinationFlushFunction { @Throws(Exception::class) fun flush( decs: StreamDescriptor, - stream: Stream, + stream: Stream, ) /** diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt index 75d8fffea82f..579eb0506242 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.kt @@ -60,7 +60,7 @@ import org.slf4j.LoggerFactory class BufferedStreamConsumer @VisibleForTesting internal constructor( - private val outputRecordCollector: Consumer?, + private val outputRecordCollector: Consumer, private val onStart: OnStartFunction, private val bufferingStrategy: BufferingStrategy, private val onClose: OnCloseFunction, @@ -87,7 +87,7 @@ internal constructor( */ @Deprecated("") constructor( - outputRecordCollector: Consumer?, + outputRecordCollector: Consumer, onStart: OnStartFunction, bufferingStrategy: BufferingStrategy, onClose: OnCloseFunction, @@ -109,7 +109,7 @@ internal constructor( ) constructor( - outputRecordCollector: Consumer?, + outputRecordCollector: Consumer, onStart: OnStartFunction, bufferingStrategy: BufferingStrategy, onClose: OnCloseFunction, diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt index d05bbfbdbd52..b0bf8a570082 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/RecordWriter.kt @@ -6,7 +6,8 @@ package io.airbyte.cdk.integrations.destination.buffered_stream_consumer import io.airbyte.commons.functional.CheckedBiConsumer import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair -interface RecordWriter : CheckedBiConsumer, Exception> { +fun interface RecordWriter : + CheckedBiConsumer, Exception> { @Throws(Exception::class) override fun accept(stream: AirbyteStreamNameNamespacePair, records: List) } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt index 8ce7912fd8cc..da28224adf18 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperations.kt @@ -52,7 +52,7 @@ interface SqlOperations { * @throws Exception exception */ @Throws(Exception::class) - fun createTableIfNotExists(database: JdbcDatabase?, schemaName: String?, tableName: String?) + fun createTableIfNotExists(database: JdbcDatabase, schemaName: String?, tableName: String?) /** * Query to create a table with provided name in provided schema if it does not already exist. @@ -72,7 +72,7 @@ interface SqlOperations { * @throws Exception exception */ @Throws(Exception::class) - fun dropTableIfExists(database: JdbcDatabase?, schemaName: String?, tableName: String?) + fun dropTableIfExists(database: JdbcDatabase, schemaName: String?, tableName: String?) /** * Query to remove all records from a table. Assumes the table exists. @@ -82,11 +82,7 @@ interface SqlOperations { * @param tableName Name of table * @return Query */ - fun truncateTableQuery( - database: JdbcDatabase?, - schemaName: String?, - tableName: String? - ): String? + fun truncateTableQuery(database: JdbcDatabase?, schemaName: String?, tableName: String?): String /** * Insert records into table. Assumes the table exists. @@ -99,8 +95,8 @@ interface SqlOperations { */ @Throws(Exception::class) fun insertRecords( - database: JdbcDatabase?, - records: List?, + database: JdbcDatabase, + records: List, schemaName: String?, tableName: String? ) @@ -131,8 +127,7 @@ interface SqlOperations { * @param queries Queries to execute * @throws Exception exception */ - @Throws(Exception::class) - fun executeTransaction(database: JdbcDatabase?, queries: List?) + @Throws(Exception::class) fun executeTransaction(database: JdbcDatabase, queries: List) /** Check if the data record is valid and ok to be written to destination */ fun isValidData(data: JsonNode?): Boolean diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt index 76ae3262702f..bb8530eb46b7 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/StreamCopier.kt @@ -48,7 +48,7 @@ interface StreamCopier { @Throws(Exception::class) fun createDestinationTable(): String? /** Generates a merge SQL statement from the temporary table to the final table. */ - @Throws(Exception::class) fun generateMergeStatement(destTableName: String?): String? + @Throws(Exception::class) fun generateMergeStatement(destTableName: String?): String /** * Cleans up the copier by removing the staging file and dropping the temporary table after diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt index 80570af6b760..635187a732b6 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.kt @@ -21,12 +21,12 @@ import org.slf4j.LoggerFactory * This should be deprecated as we slowly move towards using [SerializedBufferingStrategy] instead. */ class InMemoryRecordBufferingStrategy( - private val recordWriter: RecordWriter, + private val recordWriter: RecordWriter, private val checkAndRemoveRecordWriter: CheckAndRemoveRecordWriter?, private val maxQueueSizeInBytes: Long ) : BufferingStrategy { private var streamBuffer: - MutableMap> = + MutableMap> = HashMap() private var fileName: String? = null @@ -34,7 +34,7 @@ class InMemoryRecordBufferingStrategy( private var bufferSizeInBytes: Long = 0 constructor( - recordWriter: RecordWriter, + recordWriter: RecordWriter, maxQueueSizeInBytes: Long ) : this(recordWriter, null, maxQueueSizeInBytes) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index c7be3358f550..f2aa5c1c94bf 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.28.11 +version=0.28.12 diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt index bbb453b4fff5..09b011583d90 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/base/IntegrationRunnerTest.kt @@ -39,7 +39,7 @@ import org.slf4j.LoggerFactory internal class IntegrationRunnerTest { private lateinit var cliParser: IntegrationCliParser - private lateinit var stdoutConsumer: Consumer + private lateinit var stdoutConsumer: Consumer private lateinit var destination: Destination private lateinit var source: Source private lateinit var configPath: Path diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt index a515bdd90f12..3a1d1fe31847 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt @@ -553,7 +553,7 @@ class AsyncStreamConsumerTest { namespace: String, allRecords: List, ) { - val argumentCaptor = org.mockito.kotlin.argumentCaptor>() + val argumentCaptor = org.mockito.kotlin.argumentCaptor>() Mockito.verify(flushFunction, Mockito.atLeast(1)) .flush( org.mockito.kotlin.eq( diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt index c53d9deb3511..46ec065ebcad 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.kt @@ -31,10 +31,10 @@ import org.mockito.kotlin.mock class BufferedStreamConsumerTest { private lateinit var consumer: BufferedStreamConsumer private lateinit var onStart: OnStartFunction - private lateinit var recordWriter: RecordWriter + private lateinit var recordWriter: RecordWriter private lateinit var onClose: OnCloseFunction private lateinit var isValidRecord: CheckedFunction - private lateinit var outputRecordCollector: Consumer + private lateinit var outputRecordCollector: Consumer @BeforeEach @Throws(Exception::class) diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt index 9474afe20d7a..90584e05b129 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.kt @@ -16,7 +16,7 @@ import org.mockito.Mockito import org.mockito.kotlin.mock class InMemoryRecordBufferingStrategyTest { - private val recordWriter: RecordWriter = mock() + private val recordWriter: RecordWriter = mock() @Test @Throws(Exception::class) diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle b/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle index 3fc95410b3b9..a42598118edf 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle @@ -8,6 +8,10 @@ java { } } +compileKotlin.compilerOptions.allWarningsAsErrors = false +compileTestFixturesKotlin.compilerOptions.allWarningsAsErrors = false +compileTestKotlin.compilerOptions.allWarningsAsErrors = false + dependencies { api 'org.apache.commons:commons-csv:1.10.0' @@ -27,4 +31,6 @@ dependencies { testFixturesImplementation testFixtures(project(':airbyte-cdk:java:airbyte-cdk:typing-deduping')) testImplementation project(':airbyte-cdk:java:airbyte-cdk:typing-deduping') + testImplementation 'org.mockito.kotlin:mockito-kotlin:5.2.1' + } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt index c1751bfa3c0f..b9c21dec012a 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.kt @@ -15,6 +15,7 @@ import io.airbyte.cdk.integrations.JdbcConnector import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility.emitConfigErrorTrace import io.airbyte.cdk.integrations.base.Destination +import io.airbyte.cdk.integrations.base.JavaBaseConstants import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.getRawNamespaceOverride import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 @@ -37,20 +38,21 @@ import io.airbyte.integrations.base.destination.typing_deduping.migrators.Minimu import io.airbyte.protocol.models.v0.AirbyteConnectionStatus import io.airbyte.protocol.models.v0.AirbyteMessage import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog -import org.apache.commons.lang3.NotImplementedException -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.sql.Connection import java.sql.ResultSet import java.sql.SQLException import java.util.* import java.util.function.Consumer import javax.sql.DataSource +import org.apache.commons.lang3.NotImplementedException +import org.slf4j.Logger +import org.slf4j.LoggerFactory -abstract class AbstractJdbcDestination -(driverClass: String?, - protected val namingResolver: NamingConventionTransformer, - protected val sqlOperations: SqlOperations) : JdbcConnector(driverClass!!), Destination { +abstract class AbstractJdbcDestination( + driverClass: String, + protected val namingResolver: NamingConventionTransformer, + protected val sqlOperations: SqlOperations +) : JdbcConnector(driverClass), Destination { protected val configSchemaKey: String get() = "schema" @@ -62,8 +64,11 @@ abstract class AbstractJdbcDestination { - val customProperties = parseJdbcParameters(config!!, JdbcUtils.JDBC_URL_PARAMS_KEY) + protected open fun getConnectionProperties(config: JsonNode): Map { + val customProperties = parseJdbcParameters(config, JdbcUtils.JDBC_URL_PARAMS_KEY) val defaultProperties = getDefaultConnectionProperties(config) assertCustomParametersDontOverwriteDefaultParameters(customProperties, defaultProperties) return MoreMaps.merge(customProperties, defaultProperties) } - private fun assertCustomParametersDontOverwriteDefaultParameters(customParameters: Map, - defaultParameters: Map) { + private fun assertCustomParametersDontOverwriteDefaultParameters( + customParameters: Map, + defaultParameters: Map + ) { for (key in defaultParameters.keys) { - require(!(customParameters.containsKey(key) && customParameters[key] != defaultParameters[key])) { "Cannot overwrite default JDBC parameter $key" } + require( + !(customParameters.containsKey(key) && + customParameters[key] != defaultParameters[key]) + ) { "Cannot overwrite default JDBC parameter $key" } } } - protected abstract fun getDefaultConnectionProperties(config: JsonNode?): Map + protected abstract fun getDefaultConnectionProperties(config: JsonNode): Map - abstract fun toJdbcConfig(config: JsonNode?): JsonNode + abstract fun toJdbcConfig(config: JsonNode): JsonNode protected abstract val sqlGenerator: JdbcSqlGenerator - get - protected abstract fun getDestinationHandler(databaseName: String?, - database: JdbcDatabase?, - rawTableSchema: String?): JdbcDestinationHandler + protected abstract fun getDestinationHandler( + databaseName: String, + database: JdbcDatabase, + rawTableSchema: String + ): JdbcDestinationHandler /** - * Provide any migrations that the destination needs to run. Most destinations will need to provide - * an instande of + * Provide any migrations that the destination needs to run. Most destinations will need to + * provide an instande of * [io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcV1V2Migrator] at minimum. */ protected abstract fun getMigrations( - database: JdbcDatabase?, - databaseName: String?, - sqlGenerator: SqlGenerator?, - destinationHandler: DestinationHandler?): List> + database: JdbcDatabase, + databaseName: String, + sqlGenerator: SqlGenerator, + destinationHandler: DestinationHandler + ): List> /** * "database" key at root of the config json, for any other variants in config, override this @@ -176,75 +195,112 @@ abstract class AbstractJdbcDestination?): AirbyteMessageConsumer? { + override fun getConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): AirbyteMessageConsumer? { throw NotImplementedException("Should use the getSerializedMessageConsumer instead") } @Throws(Exception::class) - override fun getSerializedMessageConsumer(config: JsonNode, - catalog: ConfiguredAirbyteCatalog?, - outputRecordCollector: Consumer?): SerializedAirbyteMessageConsumer? { + override fun getSerializedMessageConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): SerializedAirbyteMessageConsumer? { val database = getDatabase(getDataSource(config)) // Short circuit for non-v2 destinations. if (!isDestinationV2) { - return createAsync( - outputRecordCollector, - database, - sqlOperations, - namingResolver, - config, - catalog, - null, - NoopTyperDeduper()) + return JdbcBufferedConsumerFactory.createAsync( + outputRecordCollector, + database, + sqlOperations, + namingResolver, + config, + catalog, + null, + NoopTyperDeduper() + ) } val defaultNamespace = config[configSchemaKey].asText() addDefaultNamespaceToStreams(catalog!!, defaultNamespace) - return getV2MessageConsumer(config, catalog, outputRecordCollector, database, defaultNamespace) + return getV2MessageConsumer( + config, + catalog, + outputRecordCollector, + database, + defaultNamespace + ) } - private fun getV2MessageConsumer(config: JsonNode, - catalog: ConfiguredAirbyteCatalog?, - outputRecordCollector: Consumer?, - database: JdbcDatabase, - defaultNamespace: String): SerializedAirbyteMessageConsumer? { + private fun getV2MessageConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog?, + outputRecordCollector: Consumer, + database: JdbcDatabase, + defaultNamespace: String + ): SerializedAirbyteMessageConsumer? { val sqlGenerator = sqlGenerator val rawNamespaceOverride = getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE) - val parsedCatalog = rawNamespaceOverride - .map { override: String? -> CatalogParser(sqlGenerator, override!!) } + val parsedCatalog = + rawNamespaceOverride + .map { override: String -> CatalogParser(sqlGenerator, override) } .orElse(CatalogParser(sqlGenerator)) .parseCatalog(catalog!!) val databaseName = getDatabaseName(config) val migrator = JdbcV1V2Migrator(namingResolver, database, databaseName) val v2TableMigrator = NoopV2TableMigrator() val destinationHandler: DestinationHandler = - getDestinationHandler(databaseName, database, rawNamespaceOverride.orElse(DEFAULT_AIRBYTE_INTERNAL_NAMESPACE)) - val disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config[DISABLE_TYPE_DEDUPE].asBoolean(false) + getDestinationHandler( + databaseName, + database, + rawNamespaceOverride.orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE) + ) + val disableTypeDedupe = + config.has(DISABLE_TYPE_DEDUPE) && config[DISABLE_TYPE_DEDUPE].asBoolean(false) val typerDeduper: TyperDeduper val migrations = getMigrations(database, databaseName, sqlGenerator, destinationHandler) - typerDeduper = if (disableTypeDedupe) { - NoOpTyperDeduperWithV1V2Migrations(sqlGenerator, destinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations) - } else { - DefaultTyperDeduper(sqlGenerator, destinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations) - } + typerDeduper = + if (disableTypeDedupe) { + NoOpTyperDeduperWithV1V2Migrations( + sqlGenerator, + destinationHandler, + parsedCatalog, + migrator, + v2TableMigrator, + migrations + ) + } else { + DefaultTyperDeduper( + sqlGenerator, + destinationHandler, + parsedCatalog, + migrator, + v2TableMigrator, + migrations + ) + } return JdbcBufferedConsumerFactory.createAsync( - outputRecordCollector, - database, - sqlOperations, - namingResolver, - config, - catalog, - defaultNamespace, - typerDeduper, - getDataTransformer(parsedCatalog, defaultNamespace)) + outputRecordCollector, + database, + sqlOperations, + namingResolver, + config, + catalog, + defaultNamespace, + typerDeduper, + getDataTransformer(parsedCatalog, defaultNamespace) + ) } companion object { @@ -255,51 +311,77 @@ abstract class AbstractJdbcDestination conn.metaData.catalogs }, { queryContext: ResultSet? -> JdbcUtils.defaultSourceOperations.rowToJson(queryContext!!) }) + database.bufferedResultSetQuery( + { conn: Connection -> conn.metaData.catalogs }, + { queryContext: ResultSet? -> + JdbcUtils.defaultSourceOperations.rowToJson(queryContext!!) + } + ) - // verify we have write permissions on the target schema by creating a table with a random name, + // verify we have write permissions on the target schema by creating a table with a + // random name, // then dropping that table - val outputTableName = namingResolver.getIdentifier("_airbyte_connection_test_" + UUID.randomUUID().toString().replace("-".toRegex(), "")) + val outputTableName = + namingResolver.getIdentifier( + "_airbyte_connection_test_" + + UUID.randomUUID().toString().replace("-".toRegex(), "") + ) sqlOps.createSchemaIfNotExists(database, outputSchema) sqlOps.createTableIfNotExists(database, outputSchema, outputTableName) // verify if user has permission to make SQL INSERT queries try { if (attemptInsert) { - sqlOps.insertRecords(database, java.util.List.of(dummyRecord), outputSchema, outputTableName) + sqlOps.insertRecords( + database, + java.util.List.of(dummyRecord), + outputSchema, + outputTableName + ) } } finally { sqlOps.dropTableIfExists(database, outputSchema, outputTableName) @@ -320,15 +402,18 @@ abstract class AbstractJdbcDestination, - private val valueNodeAdapter: Function) { + * @param filterValueNode + * - filter condition which decide which value node should be adapted + * @param valueNodeAdapter + * - transformation function which returns adapted value node + */ +( + private val filterValueNode: Predicate, + private val valueNodeAdapter: Function +) { fun adapt(messageData: JsonNode?) { if (messageData != null) { adaptAllValueNodes(messageData) @@ -29,9 +34,9 @@ class DataAdapter } /** - * The method inspects json node. In case, it's a value node we check the node by CheckFunction and - * apply ValueNodeAdapter. Filtered nodes will be updated by adapted version. If element is an array - * or an object, this we run the method recursively for them. + * The method inspects json node. In case, it's a value node we check the node by CheckFunction + * and apply ValueNodeAdapter. Filtered nodes will be updated by adapted version. If element is + * an array or an object, this we run the method recursively for them. * * @param fieldName Name of a json node * @param node Json node @@ -44,9 +49,13 @@ class DataAdapter (parentNode as ObjectNode?)!!.set(fieldName, adaptedNode) } else throw RuntimeException("Unexpected value node without fieldName. Node: $node") } else if (node.isArray) { - node.elements().forEachRemaining { arrayNode: JsonNode -> adaptValueNodes(null, arrayNode, node) } + node.elements().forEachRemaining { arrayNode: JsonNode -> + adaptValueNodes(null, arrayNode, node) + } } else { - node.fields().forEachRemaining { stringJsonNodeEntry: Map.Entry -> adaptValueNodes(stringJsonNodeEntry.key, stringJsonNodeEntry.value, node) } + node.fields().forEachRemaining { stringJsonNodeEntry: Map.Entry -> + adaptValueNodes(stringJsonNodeEntry.key, stringJsonNodeEntry.value, node) + } } } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt index fb07aeb3ad5e..a13c2f3dba43 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt @@ -7,6 +7,7 @@ import com.fasterxml.jackson.databind.JsonNode import com.google.common.base.Preconditions import io.airbyte.cdk.db.jdbc.JdbcDatabase import io.airbyte.cdk.db.jdbc.JdbcUtils +import io.airbyte.cdk.integrations.base.JavaBaseConstants import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.getRawNamespaceOverride import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 @@ -26,29 +27,25 @@ import io.airbyte.commons.json.Jsons import io.airbyte.integrations.base.destination.typing_deduping.StreamId.Companion.concatenateRawTableName import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper import io.airbyte.protocol.models.v0.* -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.util.* import java.util.concurrent.Executors import java.util.function.Consumer import java.util.function.Function import java.util.stream.Collectors +import org.slf4j.Logger +import org.slf4j.LoggerFactory /** * Strategy: * - * * 1. Create a final table for each stream * - * * 2. Accumulate records in a buffer. One buffer per stream * - * * 3. As records accumulate write them in batch to the database. We set a minimum numbers of records * before writing to avoid wasteful record-wise writes. In the case with slow syncs this will be * superseded with a periodic record flush from [BufferedStreamConsumer.periodicBufferFlush] * - * * 4. Once all records have been written to buffer, flush the buffer and write any remaining records * to the database (regardless of how few are left) */ @@ -56,50 +53,70 @@ object JdbcBufferedConsumerFactory { private val LOGGER: Logger = LoggerFactory.getLogger(JdbcBufferedConsumerFactory::class.java) @JvmOverloads - fun createAsync(outputRecordCollector: Consumer?, - database: JdbcDatabase, - sqlOperations: SqlOperations, - namingResolver: NamingConventionTransformer, - config: JsonNode, - catalog: ConfiguredAirbyteCatalog?, - defaultNamespace: String?, - typerDeduper: TyperDeduper, - dataTransformer: StreamAwareDataTransformer? = IdentityDataTransformer()): SerializedAirbyteMessageConsumer { - val writeConfigs = createWriteConfigs(namingResolver, config, catalog, sqlOperations.isSchemaRequired) + fun createAsync( + outputRecordCollector: Consumer, + database: JdbcDatabase, + sqlOperations: SqlOperations, + namingResolver: NamingConventionTransformer, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + defaultNamespace: String?, + typerDeduper: TyperDeduper, + dataTransformer: StreamAwareDataTransformer = IdentityDataTransformer() + ): SerializedAirbyteMessageConsumer { + val writeConfigs = + createWriteConfigs(namingResolver, config, catalog, sqlOperations.isSchemaRequired) return AsyncStreamConsumer( - outputRecordCollector, - onStartFunction(database, sqlOperations, writeConfigs, typerDeduper), - onCloseFunction(typerDeduper), - JdbcInsertFlushFunction(recordWriterFunction(database, sqlOperations, writeConfigs, catalog)), - catalog!!, - BufferManager((Runtime.getRuntime().maxMemory() * 0.2).toLong()), - FlushFailure(), - Optional.ofNullable(defaultNamespace), - Executors.newFixedThreadPool(2), - dataTransformer!!, - DeserializationUtil()) + outputRecordCollector, + onStartFunction(database, sqlOperations, writeConfigs, typerDeduper), + onCloseFunction(typerDeduper), + JdbcInsertFlushFunction( + recordWriterFunction(database, sqlOperations, writeConfigs, catalog) + ), + catalog, + BufferManager((Runtime.getRuntime().maxMemory() * 0.2).toLong()), + FlushFailure(), + Optional.ofNullable(defaultNamespace), + Executors.newFixedThreadPool(2), + dataTransformer, + DeserializationUtil() + ) } - private fun createWriteConfigs(namingResolver: NamingConventionTransformer, - config: JsonNode, - catalog: ConfiguredAirbyteCatalog?, - schemaRequired: Boolean): List { + private fun createWriteConfigs( + namingResolver: NamingConventionTransformer, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog?, + schemaRequired: Boolean + ): List { if (schemaRequired) { - Preconditions.checkState(config.has("schema"), "jdbc destinations must specify a schema.") + Preconditions.checkState( + config.has("schema"), + "jdbc destinations must specify a schema." + ) } - return catalog!!.streams.stream().map(toWriteConfig(namingResolver, config, schemaRequired)).collect(Collectors.toList()) + return catalog!! + .streams + .stream() + .map(toWriteConfig(namingResolver, config, schemaRequired)) + .collect(Collectors.toList()) } private fun toWriteConfig( - namingResolver: NamingConventionTransformer, - config: JsonNode, - schemaRequired: Boolean): Function { + namingResolver: NamingConventionTransformer, + config: JsonNode, + schemaRequired: Boolean + ): Function { return Function { stream: ConfiguredAirbyteStream -> - Preconditions.checkNotNull(stream.destinationSyncMode, "Undefined destination sync mode") + Preconditions.checkNotNull( + stream.destinationSyncMode, + "Undefined destination sync mode" + ) val abStream = stream.stream - val defaultSchemaName = if (schemaRequired) namingResolver.getIdentifier(config["schema"].asText()) - else namingResolver.getIdentifier(config[JdbcUtils.DATABASE_KEY].asText()) + val defaultSchemaName = + if (schemaRequired) namingResolver.getIdentifier(config["schema"].asText()) + else namingResolver.getIdentifier(config[JdbcUtils.DATABASE_KEY].asText()) // Method checks for v2 val outputSchema = getOutputSchema(abStream, defaultSchemaName, namingResolver) val streamName = abStream.name @@ -117,7 +134,15 @@ object JdbcBufferedConsumerFactory { } val syncMode = stream.destinationSyncMode - val writeConfig = WriteConfig(streamName, abStream.namespace, outputSchema, tmpTableName, tableName, syncMode) + val writeConfig = + WriteConfig( + streamName, + abStream.namespace, + outputSchema, + tmpTableName, + tableName, + syncMode + ) LOGGER.info("Write config: {}", writeConfig) writeConfig } @@ -127,58 +152,74 @@ object JdbcBufferedConsumerFactory { * Defer to the [AirbyteStream]'s namespace. If this is not set, use the destination's default * schema. This namespace is source-provided, and can be potentially empty. * - * - * The logic here matches the logic in the catalog_process.py for Normalization. Any modifications - * need to be reflected there and vice versa. + * The logic here matches the logic in the catalog_process.py for Normalization. Any + * modifications need to be reflected there and vice versa. */ - private fun getOutputSchema(stream: AirbyteStream, - defaultDestSchema: String, - namingResolver: NamingConventionTransformer): String { + private fun getOutputSchema( + stream: AirbyteStream, + defaultDestSchema: String, + namingResolver: NamingConventionTransformer + ): String { return if (isDestinationV2) { - namingResolver - .getNamespace(getRawNamespaceOverride(AbstractJdbcDestination.Companion.RAW_SCHEMA_OVERRIDE).orElse(DEFAULT_AIRBYTE_INTERNAL_NAMESPACE)) + namingResolver.getNamespace( + getRawNamespaceOverride(AbstractJdbcDestination.Companion.RAW_SCHEMA_OVERRIDE) + .orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE) + ) } else { - namingResolver.getNamespace(Optional.ofNullable(stream.namespace).orElse(defaultDestSchema)) + namingResolver.getNamespace( + Optional.ofNullable(stream.namespace).orElse(defaultDestSchema) + ) } } /** * Sets up destination storage through: * - * * 1. Creates Schema (if not exists) * - * * 2. Creates airybte_raw table (if not exists) * - * * 3. Truncates table if sync mode is in OVERWRITE * * @param database JDBC database to connect to * @param sqlOperations interface for execution SQL queries - * @param writeConfigs settings for each stream - */ - private fun onStartFunction(database: JdbcDatabase, - sqlOperations: SqlOperations, - writeConfigs: Collection, - typerDeduper: TyperDeduper): OnStartFunction { + * @param writeConfigs settings for each stream + */ + private fun onStartFunction( + database: JdbcDatabase, + sqlOperations: SqlOperations, + writeConfigs: Collection, + typerDeduper: TyperDeduper + ): OnStartFunction { return OnStartFunction { typerDeduper.prepareSchemasAndRunMigrations() - LOGGER.info("Preparing raw tables in destination started for {} streams", writeConfigs.size) - val queryList: MutableList = ArrayList() + LOGGER.info( + "Preparing raw tables in destination started for {} streams", + writeConfigs.size + ) + val queryList: MutableList = ArrayList() for (writeConfig in writeConfigs) { val schemaName = writeConfig.outputSchemaName val dstTableName = writeConfig.outputTableName - LOGGER.info("Preparing raw table in destination started for stream {}. schema: {}, table name: {}", - writeConfig.streamName, - schemaName, - dstTableName) + LOGGER.info( + "Preparing raw table in destination started for stream {}. schema: {}, table name: {}", + writeConfig.streamName, + schemaName, + dstTableName + ) sqlOperations.createSchemaIfNotExists(database, schemaName) sqlOperations.createTableIfNotExists(database, schemaName, dstTableName) when (writeConfig.syncMode) { - DestinationSyncMode.OVERWRITE -> queryList.add(sqlOperations.truncateTableQuery(database, schemaName, dstTableName)) - DestinationSyncMode.APPEND, DestinationSyncMode.APPEND_DEDUP -> {} - else -> throw IllegalStateException("Unrecognized sync mode: " + writeConfig.syncMode) + DestinationSyncMode.OVERWRITE -> + queryList.add( + sqlOperations.truncateTableQuery(database, schemaName, dstTableName) + ) + DestinationSyncMode.APPEND, + DestinationSyncMode.APPEND_DEDUP -> {} + else -> + throw IllegalStateException( + "Unrecognized sync mode: " + writeConfig.syncMode + ) } } sqlOperations.executeTransaction(database, queryList) @@ -195,25 +236,39 @@ object JdbcBufferedConsumerFactory { * @param writeConfigs settings for each stream * @param catalog catalog of all streams to sync */ - private fun recordWriterFunction(database: JdbcDatabase, - sqlOperations: SqlOperations, - writeConfigs: List, - catalog: ConfiguredAirbyteCatalog?): RecordWriter { - val pairToWriteConfig = writeConfigs.stream() - .collect(Collectors.toUnmodifiableMap(Function { obj: WriteConfig? -> toNameNamespacePair() }, Function.identity())) + private fun recordWriterFunction( + database: JdbcDatabase, + sqlOperations: SqlOperations, + writeConfigs: List, + catalog: ConfiguredAirbyteCatalog? + ): RecordWriter { + val pairToWriteConfig: Map = + writeConfigs.associateBy { toNameNamespacePair(it) } - return RecordWriter { pair: AirbyteStreamNameNamespacePair, records: List? -> - require(pairToWriteConfig.containsKey(pair)) { String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s", Jsons.serialize(catalog)) } - val writeConfig = pairToWriteConfig[pair] - sqlOperations.insertRecords(database, ArrayList(records), writeConfig.getOutputSchemaName(), writeConfig.getOutputTableName()) + return RecordWriter { + pair: AirbyteStreamNameNamespacePair, + records: List -> + require(pairToWriteConfig.containsKey(pair)) { + String.format( + "Message contained record from a stream that was not in the catalog. \ncatalog: %s", + Jsons.serialize(catalog) + ) + } + val writeConfig = pairToWriteConfig.getValue(pair) + sqlOperations.insertRecords( + database, + ArrayList(records), + writeConfig.outputSchemaName, + writeConfig.outputTableName + ) } } - /** - * Tear down functionality - */ + /** Tear down functionality */ private fun onCloseFunction(typerDeduper: TyperDeduper): OnCloseFunction { - return OnCloseFunction { hasFailed: Boolean?, streamSyncSummaries: Map? -> + return OnCloseFunction { + hasFailed: Boolean, + streamSyncSummaries: Map -> try { typerDeduper.typeAndDedupe(streamSyncSummaries) typerDeduper.commitFinalTables() diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt index 551187753ff1..613856d191cb 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcInsertFlushFunction.kt @@ -11,16 +11,20 @@ import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair import io.airbyte.protocol.models.v0.StreamDescriptor import java.util.stream.Stream -class JdbcInsertFlushFunction(private val recordWriter: RecordWriter) : DestinationFlushFunction { +class JdbcInsertFlushFunction(private val recordWriter: RecordWriter) : + DestinationFlushFunction { @Throws(Exception::class) - override fun flush(desc: StreamDescriptor, stream: Stream) { + override fun flush(desc: StreamDescriptor, stream: Stream) { recordWriter.accept( - AirbyteStreamNameNamespacePair(desc.name, desc.namespace), - stream.toList()) + AirbyteStreamNameNamespacePair(desc.name, desc.namespace), + stream.toList() + ) } override val optimalBatchSizeBytes: Long - get() =// TODO tune this value - currently SqlOperationUtils partitions 10K records per insert statement, - // but we'd like to stop doing that and instead control sql insert statement size via batch size. + get() = // TODO tune this value - currently SqlOperationUtils partitions 10K records per + // insert statement, + // but we'd like to stop doing that and instead control sql insert statement size via + // batch size. GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES.toLong() } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt index c8226d4a9240..e1191f9b45a9 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.kt @@ -10,8 +10,6 @@ import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2 import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage import io.airbyte.commons.exceptions.ConfigErrorException import io.airbyte.commons.json.Jsons -import org.apache.commons.csv.CSVFormat -import org.apache.commons.csv.CSVPrinter import java.io.File import java.io.PrintWriter import java.nio.charset.StandardCharsets @@ -20,6 +18,8 @@ import java.sql.Timestamp import java.time.Instant import java.util.* import java.util.function.Consumer +import org.apache.commons.csv.CSVFormat +import org.apache.commons.csv.CSVPrinter abstract class JdbcSqlOperations : SqlOperations { // this adapter modifies record message before inserting them to the destination @@ -48,8 +48,8 @@ abstract class JdbcSqlOperations : SqlOperations { /** * When an exception occurs, we may recognize it as an issue with the users permissions or other - * configuration options. In these cases, we can wrap the exception in a - * [ConfigErrorException] which will exclude the error from our on-call paging/reporting + * configuration options. In these cases, we can wrap the exception in a [ConfigErrorException] + * which will exclude the error from our on-call paging/reporting * * @param e the exception to check. * @return A ConfigErrorException with a message with actionable feedback to the user. @@ -59,9 +59,13 @@ abstract class JdbcSqlOperations : SqlOperations { } @Throws(SQLException::class) - override fun createTableIfNotExists(database: JdbcDatabase?, schemaName: String?, tableName: String?) { + override fun createTableIfNotExists( + database: JdbcDatabase, + schemaName: String?, + tableName: String? + ) { try { - database!!.execute(createTableQuery(database, schemaName, tableName)) + database.execute(createTableQuery(database, schemaName, tableName)) for (postCreateSql in postCreateTableQueries(schemaName, tableName)) { database.execute(postCreateSql) } @@ -70,7 +74,11 @@ abstract class JdbcSqlOperations : SqlOperations { } } - override fun createTableQuery(database: JdbcDatabase?, schemaName: String?, tableName: String?): String? { + override fun createTableQuery( + database: JdbcDatabase?, + schemaName: String?, + tableName: String? + ): String? { return if (isDestinationV2) { createTableQueryV2(schemaName, tableName) } else { @@ -79,9 +87,9 @@ abstract class JdbcSqlOperations : SqlOperations { } /** - * Some subclasses may want to execute additional SQL statements after creating the raw table. For - * example, Postgres does not support index definitions within a CREATE TABLE statement, so we need - * to run CREATE INDEX statements after creating the table. + * Some subclasses may want to execute additional SQL statements after creating the raw table. + * For example, Postgres does not support index definitions within a CREATE TABLE statement, so + * we need to run CREATE INDEX statements after creating the table. */ protected fun postCreateTableQueries(schemaName: String?, tableName: String?): List { return listOf() @@ -89,7 +97,7 @@ abstract class JdbcSqlOperations : SqlOperations { protected fun createTableQueryV1(schemaName: String?, tableName: String?): String { return String.format( - """ + """ CREATE TABLE IF NOT EXISTS %s.%s ( %s VARCHAR PRIMARY KEY, %s JSONB, @@ -97,14 +105,20 @@ abstract class JdbcSqlOperations : SqlOperations { ); """.trimIndent(), - schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT) + schemaName, + tableName, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + ) } protected fun createTableQueryV2(schemaName: String?, tableName: String?): String { - // Note that Meta is the last column in order, there was a time when tables didn't have meta, + // Note that Meta is the last column in order, there was a time when tables didn't have + // meta, // we issued Alter to add that column so it should be the last column. return String.format( - """ + """ CREATE TABLE IF NOT EXISTS %s.%s ( %s VARCHAR PRIMARY KEY, %s JSONB, @@ -114,13 +128,14 @@ abstract class JdbcSqlOperations : SqlOperations { ); """.trimIndent(), - schemaName, - tableName, - JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, - JavaBaseConstants.COLUMN_NAME_DATA, - JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, - JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT, - JavaBaseConstants.COLUMN_NAME_AB_META) + schemaName, + tableName, + JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, + JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT, + JavaBaseConstants.COLUMN_NAME_AB_META + ) } // TODO: This method seems to be used by Postgres and others while staging to local temp files. @@ -131,11 +146,14 @@ abstract class JdbcSqlOperations : SqlOperations { CSVPrinter(writer, CSVFormat.DEFAULT).use { csvPrinter -> for (record in records) { val uuid = UUID.randomUUID().toString() - // TODO we only need to do this is formatData is overridden. If not, we can just do jsonData = + // TODO we only need to do this is formatData is overridden. If not, we can just + // do jsonData = // record.getSerialized() - val jsonData = Jsons.serialize(formatData(Jsons.deserializeExact(record.serialized))) + val jsonData = + Jsons.serialize(formatData(Jsons.deserializeExact(record.serialized))) val airbyteMeta = Jsons.serialize(record.record!!.meta) - val extractedAt = Timestamp.from(Instant.ofEpochMilli(record.record!!.emittedAt)) + val extractedAt = + Timestamp.from(Instant.ofEpochMilli(record.record!!.emittedAt)) if (isDestinationV2) { csvPrinter.printRecord(uuid, jsonData, extractedAt, null, airbyteMeta) } else { @@ -150,29 +168,48 @@ abstract class JdbcSqlOperations : SqlOperations { return data } - override fun truncateTableQuery(database: JdbcDatabase?, schemaName: String?, tableName: String?): String? { + override fun truncateTableQuery( + database: JdbcDatabase?, + schemaName: String?, + tableName: String? + ): String { return String.format("TRUNCATE TABLE %s.%s;\n", schemaName, tableName) } - override fun insertTableQuery(database: JdbcDatabase?, schemaName: String?, srcTableName: String?, dstTableName: String?): String? { - return String.format("INSERT INTO %s.%s SELECT * FROM %s.%s;\n", schemaName, dstTableName, schemaName, srcTableName) + override fun insertTableQuery( + database: JdbcDatabase?, + schemaName: String?, + srcTableName: String?, + dstTableName: String? + ): String? { + return String.format( + "INSERT INTO %s.%s SELECT * FROM %s.%s;\n", + schemaName, + dstTableName, + schemaName, + srcTableName + ) } @Throws(Exception::class) - override fun executeTransaction(database: JdbcDatabase?, queries: List?) { + override fun executeTransaction(database: JdbcDatabase, queries: List) { val appendedQueries = StringBuilder() appendedQueries.append("BEGIN;\n") - for (query in queries!!) { + for (query in queries) { appendedQueries.append(query) } appendedQueries.append("COMMIT;") - database!!.execute(appendedQueries.toString()) + database.execute(appendedQueries.toString()) } @Throws(SQLException::class) - override fun dropTableIfExists(database: JdbcDatabase?, schemaName: String?, tableName: String?) { + override fun dropTableIfExists( + database: JdbcDatabase, + schemaName: String?, + tableName: String? + ) { try { - database!!.execute(dropTableIfExistsQuery(schemaName, tableName)) + database.execute(dropTableIfExistsQuery(schemaName, tableName)) } catch (e: SQLException) { throw checkForKnownConfigExceptions(e).orElseThrow { e } } @@ -190,16 +227,20 @@ abstract class JdbcSqlOperations : SqlOperations { } @Throws(Exception::class) - override fun insertRecords(database: JdbcDatabase?, - records: List?, - schemaName: String?, - tableName: String?) { + override fun insertRecords( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) { dataAdapter.ifPresent { adapter: DataAdapter -> - records!!.forEach(Consumer { airbyteRecordMessage: PartialAirbyteMessage? -> - val data = Jsons.deserializeExact(airbyteRecordMessage!!.serialized) - adapter.adapt(data) - airbyteRecordMessage.serialized = Jsons.serialize(data) - }) + records!!.forEach( + Consumer { airbyteRecordMessage: PartialAirbyteMessage? -> + val data = Jsons.deserializeExact(airbyteRecordMessage!!.serialized) + adapter.adapt(data) + airbyteRecordMessage.serialized = Jsons.serialize(data) + } + ) } if (isDestinationV2) { insertRecordsInternalV2(database, records, schemaName, tableName) @@ -209,16 +250,20 @@ abstract class JdbcSqlOperations : SqlOperations { } @Throws(Exception::class) - protected abstract fun insertRecordsInternal(database: JdbcDatabase?, - records: List?, - schemaName: String?, - tableName: String?) + protected abstract fun insertRecordsInternal( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) @Throws(Exception::class) - protected abstract fun insertRecordsInternalV2(database: JdbcDatabase?, - records: List?, - schemaName: String?, - tableName: String?) + protected abstract fun insertRecordsInternalV2( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) companion object { protected const val SHOW_SCHEMAS: String = "show schemas;" diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt index 0dc15a5ed24f..32a779207b17 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt @@ -19,8 +19,8 @@ import java.util.function.Supplier object SqlOperationsUtils { /** - * Inserts "raw" records in a single query. The purpose of helper to abstract away database-specific - * SQL syntax from this query. + * Inserts "raw" records in a single query. The purpose of helper to abstract away + * database-specific SQL syntax from this query. * * @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id, * data, emitted_at) @@ -30,16 +30,25 @@ object SqlOperationsUtils { * @throws SQLException exception */ @Throws(SQLException::class) - fun insertRawRecordsInSingleQuery(insertQueryComponent: String?, - recordQueryComponent: String?, - jdbcDatabase: JdbcDatabase, - records: List) { - insertRawRecordsInSingleQuery(insertQueryComponent, recordQueryComponent, jdbcDatabase, records, { UUID.randomUUID() }, true) + fun insertRawRecordsInSingleQuery( + insertQueryComponent: String?, + recordQueryComponent: String?, + jdbcDatabase: JdbcDatabase, + records: List + ) { + insertRawRecordsInSingleQuery( + insertQueryComponent, + recordQueryComponent, + jdbcDatabase, + records, + { UUID.randomUUID() }, + true + ) } /** - * Inserts "raw" records in a single query. The purpose of helper to abstract away database-specific - * SQL syntax from this query. + * Inserts "raw" records in a single query. The purpose of helper to abstract away + * database-specific SQL syntax from this query. * * This version does not add a semicolon at the end of the INSERT statement. * @@ -51,69 +60,92 @@ object SqlOperationsUtils { * @throws SQLException exception */ @Throws(SQLException::class) - fun insertRawRecordsInSingleQueryNoSem(insertQueryComponent: String?, - recordQueryComponent: String?, - jdbcDatabase: JdbcDatabase, - records: List) { - insertRawRecordsInSingleQuery(insertQueryComponent, recordQueryComponent, jdbcDatabase, records, { UUID.randomUUID() }, false) + fun insertRawRecordsInSingleQueryNoSem( + insertQueryComponent: String?, + recordQueryComponent: String?, + jdbcDatabase: JdbcDatabase, + records: List + ) { + insertRawRecordsInSingleQuery( + insertQueryComponent, + recordQueryComponent, + jdbcDatabase, + records, + { UUID.randomUUID() }, + false + ) } @VisibleForTesting @Throws(SQLException::class) - fun insertRawRecordsInSingleQuery(insertQueryComponent: String?, - recordQueryComponent: String?, - jdbcDatabase: JdbcDatabase, - records: List, - uuidSupplier: Supplier, - sem: Boolean) { + fun insertRawRecordsInSingleQuery( + insertQueryComponent: String?, + recordQueryComponent: String?, + jdbcDatabase: JdbcDatabase, + records: List, + uuidSupplier: Supplier, + sem: Boolean + ) { if (records.isEmpty()) { return } - jdbcDatabase.execute(CheckedConsumer { connection: Connection -> + jdbcDatabase.execute( + CheckedConsumer { connection: Connection -> - // Strategy: We want to use PreparedStatement because it handles binding values to the SQL query - // (e.g. handling formatting timestamps). A PreparedStatement statement is created by supplying the - // full SQL string at creation time. Then subsequently specifying which values are bound to the - // string. Thus there will be two loops below. - // 1) Loop over records to build the full string. - // 2) Loop over the records and bind the appropriate values to the string. - // We also partition the query to run on 10k records at a time, since some DBs set a max limit on - // how many records can be inserted at once - // TODO(sherif) this should use a smarter, destination-aware partitioning scheme instead of 10k by - // default - for (partition in Iterables.partition(records, 10000)) { - val sql = StringBuilder(insertQueryComponent) - partition.forEach(Consumer { r: PartialAirbyteMessage? -> sql.append(recordQueryComponent) }) - val s = sql.toString() - val s1 = s.substring(0, s.length - 2) + (if (sem) ";" else "") + // Strategy: We want to use PreparedStatement because it handles binding values to + // the SQL query + // (e.g. handling formatting timestamps). A PreparedStatement statement is created + // by supplying the + // full SQL string at creation time. Then subsequently specifying which values are + // bound to the + // string. Thus there will be two loops below. + // 1) Loop over records to build the full string. + // 2) Loop over the records and bind the appropriate values to the string. + // We also partition the query to run on 10k records at a time, since some DBs set a + // max limit on + // how many records can be inserted at once + // TODO(sherif) this should use a smarter, destination-aware partitioning scheme + // instead of 10k by + // default + for (partition in Iterables.partition(records, 10000)) { + val sql = StringBuilder(insertQueryComponent) + partition.forEach( + Consumer { r: PartialAirbyteMessage? -> sql.append(recordQueryComponent) } + ) + val s = sql.toString() + val s1 = s.substring(0, s.length - 2) + (if (sem) ";" else "") - connection.prepareStatement(s1).use { statement -> - // second loop: bind values to the SQL string. - // 1-indexed - var i = 1 - for (message in partition) { - // Airbyte Raw ID - statement.setString(i, uuidSupplier.get().toString()) - i++ - - // Message Data - statement.setString(i, message.serialized) - i++ + connection.prepareStatement(s1).use { statement -> + // second loop: bind values to the SQL string. + // 1-indexed + var i = 1 + for (message in partition) { + // Airbyte Raw ID + statement.setString(i, uuidSupplier.get().toString()) + i++ - // Extracted At - statement.setTimestamp(i, Timestamp.from(Instant.ofEpochMilli(message.record!!.emittedAt))) - i++ + // Message Data + statement.setString(i, message.serialized) + i++ - if (isDestinationV2) { - // Loaded At - statement.setTimestamp(i, null) + // Extracted At + statement.setTimestamp( + i, + Timestamp.from(Instant.ofEpochMilli(message.record!!.emittedAt)) + ) i++ + + if (isDestinationV2) { + // Loaded At + statement.setTimestamp(i, null) + i++ + } } + statement.execute() } - statement.execute() } } - }) + ) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt index 43645555c184..72309e3628aa 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TableDefinition.kt @@ -3,12 +3,9 @@ */ package io.airbyte.cdk.integrations.destination.jdbc -import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig.columns - /** * Jdbc destination table definition representation with a map of column names to column definitions * * @param columns */ -@JvmRecord -data class TableDefinition(val columns: LinkedHashMap) +@JvmRecord data class TableDefinition(val columns: LinkedHashMap) diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt index 04a0f9891263..fc3ef9ea4796 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/WriteConfig.kt @@ -10,27 +10,36 @@ import java.time.Instant * Write configuration POJO (plain old java object) for all destinations extending * [AbstractJdbcDestination]. */ -class WriteConfig @JvmOverloads constructor(val streamName: String, - /** - * This is used in [JdbcBufferedConsumerFactory] to verify that record is from expected - * streams - * - * @return - */ - val namespace: String, - val outputSchemaName: String?, - val tmpTableName: String, - val outputTableName: String?, - val syncMode: DestinationSyncMode, - val writeDatetime: Instant = Instant.now()) { +class WriteConfig +@JvmOverloads +constructor( + val streamName: String, + /** + * This is used in [JdbcBufferedConsumerFactory] to verify that record is from expected streams + * + * @return + */ + val namespace: String, + val outputSchemaName: String, + val tmpTableName: String?, + val outputTableName: String?, + val syncMode: DestinationSyncMode, + val writeDatetime: Instant = Instant.now() +) { override fun toString(): String { return "WriteConfig{" + - "streamName=" + streamName + - ", namespace=" + namespace + - ", outputSchemaName=" + outputSchemaName + - ", tmpTableName=" + tmpTableName + - ", outputTableName=" + outputTableName + - ", syncMode=" + syncMode + - '}' + "streamName=" + + streamName + + ", namespace=" + + namespace + + ", outputSchemaName=" + + outputSchemaName + + ", tmpTableName=" + + tmpTableName + + ", outputTableName=" + + outputTableName + + ", syncMode=" + + syncMode + + '}' } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt index 6eec329a6746..67470d9df119 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyConsumerFactory.kt @@ -14,59 +14,81 @@ import io.airbyte.cdk.integrations.destination.jdbc.SqlOperations import io.airbyte.cdk.integrations.destination.jdbc.constants.GlobalDataSizeConstants import io.airbyte.cdk.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy import io.airbyte.protocol.models.v0.* -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.util.* import java.util.function.Consumer import javax.sql.DataSource +import org.slf4j.Logger +import org.slf4j.LoggerFactory object CopyConsumerFactory { private val LOGGER: Logger = LoggerFactory.getLogger(CopyConsumerFactory::class.java) - fun create(outputRecordCollector: Consumer?, - dataSource: DataSource, - database: JdbcDatabase, - sqlOperations: SqlOperations, - namingResolver: StandardNameTransformer, - config: T, - catalog: ConfiguredAirbyteCatalog, - streamCopierFactory: StreamCopierFactory, - defaultSchema: String): AirbyteMessageConsumer { - val pairToCopier = createWriteConfigs( + fun create( + outputRecordCollector: Consumer, + dataSource: DataSource, + database: JdbcDatabase, + sqlOperations: SqlOperations, + namingResolver: StandardNameTransformer, + config: T, + catalog: ConfiguredAirbyteCatalog, + streamCopierFactory: StreamCopierFactory, + defaultSchema: String + ): AirbyteMessageConsumer { + val pairToCopier = + createWriteConfigs( namingResolver, config, catalog, streamCopierFactory, defaultSchema, database, - sqlOperations) + sqlOperations + ) val pairToIgnoredRecordCount: MutableMap = HashMap() return BufferedStreamConsumer( - outputRecordCollector, - onStartFunction(pairToIgnoredRecordCount), - InMemoryRecordBufferingStrategy( - recordWriterFunction(pairToCopier, sqlOperations, pairToIgnoredRecordCount), - removeStagingFilePrinter(pairToCopier), - GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES.toLong()), - onCloseFunction(pairToCopier, database, sqlOperations, pairToIgnoredRecordCount, dataSource), - catalog + outputRecordCollector, + onStartFunction(pairToIgnoredRecordCount), + InMemoryRecordBufferingStrategy( + recordWriterFunction(pairToCopier, sqlOperations, pairToIgnoredRecordCount), + removeStagingFilePrinter(pairToCopier), + GlobalDataSizeConstants.DEFAULT_MAX_BATCH_SIZE_BYTES.toLong() + ), + onCloseFunction( + pairToCopier, + database, + sqlOperations, + pairToIgnoredRecordCount, + dataSource + ), + catalog ) { data: JsonNode? -> sqlOperations.isValidData(data) } } - private fun createWriteConfigs(namingResolver: StandardNameTransformer, - config: T, - catalog: ConfiguredAirbyteCatalog, - streamCopierFactory: StreamCopierFactory, - defaultSchema: String, - database: JdbcDatabase, - sqlOperations: SqlOperations): Map { + private fun createWriteConfigs( + namingResolver: StandardNameTransformer, + config: T, + catalog: ConfiguredAirbyteCatalog, + streamCopierFactory: StreamCopierFactory, + defaultSchema: String, + database: JdbcDatabase, + sqlOperations: SqlOperations + ): Map { val pairToCopier: MutableMap = HashMap() val stagingFolder = UUID.randomUUID().toString() for (configuredStream in catalog.streams) { val stream = configuredStream.stream val pair = AirbyteStreamNameNamespacePair.fromAirbyteStream(stream) - val copier = streamCopierFactory.create(defaultSchema, config, stagingFolder, configuredStream, namingResolver, database, sqlOperations) + val copier = + streamCopierFactory.create( + defaultSchema, + config, + stagingFolder, + configuredStream, + namingResolver, + database, + sqlOperations + ) pairToCopier[pair] = copier } @@ -74,14 +96,20 @@ object CopyConsumerFactory { return pairToCopier } - private fun onStartFunction(pairToIgnoredRecordCount: MutableMap): OnStartFunction { + private fun onStartFunction( + pairToIgnoredRecordCount: MutableMap + ): OnStartFunction { return OnStartFunction { pairToIgnoredRecordCount.clear() } } - private fun recordWriterFunction(pairToCopier: Map, - sqlOperations: SqlOperations, - pairToIgnoredRecordCount: MutableMap): RecordWriter { - return RecordWriter { pair: AirbyteStreamNameNamespacePair, records: List -> + private fun recordWriterFunction( + pairToCopier: Map, + sqlOperations: SqlOperations, + pairToIgnoredRecordCount: MutableMap + ): RecordWriter { + return RecordWriter { + pair: AirbyteStreamNameNamespacePair, + records: List -> val fileName = pairToCopier[pair]!!.prepareStagingFile() for (recordMessage in records) { val id = UUID.randomUUID() @@ -90,45 +118,64 @@ object CopyConsumerFactory { // or should we upload it into a special rejected record folder in s3 instead? pairToCopier[pair]!!.write(id, recordMessage, fileName) } else { - pairToIgnoredRecordCount[pair] = pairToIgnoredRecordCount.getOrDefault(pair, 0L) + 1L + pairToIgnoredRecordCount[pair] = + pairToIgnoredRecordCount.getOrDefault(pair, 0L) + 1L } } } } - private fun removeStagingFilePrinter(pairToCopier: Map): CheckAndRemoveRecordWriter { - return CheckAndRemoveRecordWriter { pair: AirbyteStreamNameNamespacePair?, stagingFileName: String? -> + private fun removeStagingFilePrinter( + pairToCopier: Map + ): CheckAndRemoveRecordWriter { + return CheckAndRemoveRecordWriter { + pair: AirbyteStreamNameNamespacePair?, + stagingFileName: String? -> val currentFileName = pairToCopier[pair]!!.currentFile - if (stagingFileName != null && currentFileName != null && stagingFileName != currentFileName) { + if ( + stagingFileName != null && + currentFileName != null && + stagingFileName != currentFileName + ) { pairToCopier[pair]!!.closeNonCurrentStagingFileWriters() } currentFileName } } - private fun onCloseFunction(pairToCopier: Map, - database: JdbcDatabase, - sqlOperations: SqlOperations, - pairToIgnoredRecordCount: Map, - dataSource: DataSource): OnCloseFunction { - return OnCloseFunction { hasFailed: Boolean, streamSyncSummaries: Map? -> - pairToIgnoredRecordCount - .forEach { (pair: AirbyteStreamNameNamespacePair?, count: Long?) -> LOGGER.warn("A total of {} record(s) of data from stream {} were invalid and were ignored.", count, pair) } + private fun onCloseFunction( + pairToCopier: Map, + database: JdbcDatabase, + sqlOperations: SqlOperations, + pairToIgnoredRecordCount: Map, + dataSource: DataSource + ): OnCloseFunction { + return OnCloseFunction { hasFailed: Boolean, _: Map? -> + pairToIgnoredRecordCount.forEach { (pair: AirbyteStreamNameNamespacePair?, count: Long?) + -> + LOGGER.warn( + "A total of {} record(s) of data from stream {} were invalid and were ignored.", + count, + pair + ) + } closeAsOneTransaction(pairToCopier, hasFailed, database, sqlOperations, dataSource) } } @Throws(Exception::class) - private fun closeAsOneTransaction(pairToCopier: Map, - hasFailed: Boolean, - db: JdbcDatabase, - sqlOperations: SqlOperations, - dataSource: DataSource) { + private fun closeAsOneTransaction( + pairToCopier: Map, + hasFailed: Boolean, + db: JdbcDatabase, + sqlOperations: SqlOperations, + dataSource: DataSource + ) { var hasFailed = hasFailed var firstException: Exception? = null val streamCopiers: List = ArrayList(pairToCopier.values) try { - val queries: MutableList = ArrayList() + val queries: MutableList = ArrayList() for (copier in streamCopiers) { try { copier!!.closeStagingUploader(hasFailed) @@ -141,7 +188,8 @@ object CopyConsumerFactory { queries.add(mergeQuery) } } catch (e: Exception) { - val message = String.format("Failed to finalize copy to temp table due to: %s", e) + val message = + String.format("Failed to finalize copy to temp table due to: %s", e) LOGGER.error(message) hasFailed = true if (firstException == null) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt index 49e88e9dad49..f8f11b717d1c 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/CopyDestination.kt @@ -16,14 +16,14 @@ import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination import io.airbyte.cdk.integrations.destination.jdbc.SqlOperations import io.airbyte.commons.exceptions.ConnectionErrorException import io.airbyte.protocol.models.v0.AirbyteConnectionStatus +import javax.sql.DataSource import org.slf4j.Logger import org.slf4j.LoggerFactory -import javax.sql.DataSource abstract class CopyDestination : BaseConnector, Destination { /** - * The default database schema field in the destination config is "schema". To change it, pass the - * field name to the constructor. + * The default database schema field in the destination config is "schema". To change it, pass + * the field name to the constructor. */ private var schemaFieldName = "schema" @@ -34,11 +34,10 @@ abstract class CopyDestination : BaseConnector, Destination { } /** - * A self contained method for writing a file to the persistence for testing. This method should try - * to clean up after itself by deleting the file it creates. + * A self contained method for writing a file to the persistence for testing. This method should + * try to clean up after itself by deleting the file it creates. */ - @Throws(Exception::class) - abstract fun checkPersistence(config: JsonNode?) + @Throws(Exception::class) abstract fun checkPersistence(config: JsonNode?) abstract val nameTransformer: StandardNameTransformer @@ -54,11 +53,13 @@ abstract class CopyDestination : BaseConnector, Destination { } catch (e: Exception) { LOGGER.error("Exception attempting to access the staging persistence: ", e) return AirbyteConnectionStatus() - .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage(""" + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage( + """ Could not connect to the staging persistence with the provided configuration. ${e.message} - """.trimIndent()) + """.trimIndent() + ) } val dataSource = getDataSource(config) @@ -75,16 +76,18 @@ abstract class CopyDestination : BaseConnector, Destination { val message = getErrorMessage(ex.stateCode, ex.errorCode, ex.exceptionMessage, ex) emitConfigErrorTrace(ex, message) return AirbyteConnectionStatus() - .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage(message) + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage(message) } catch (e: Exception) { LOGGER.error("Exception attempting to connect to the warehouse: ", e) return AirbyteConnectionStatus() - .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage(""" + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage( + """ Could not connect to the warehouse with the provided configuration. ${e.message} - """.trimIndent()) + """.trimIndent() + ) } finally { try { close(dataSource) @@ -95,10 +98,18 @@ abstract class CopyDestination : BaseConnector, Destination { } @Throws(Exception::class) - protected fun performCreateInsertTestOnDestination(outputSchema: String?, - database: JdbcDatabase, - nameTransformer: NamingConventionTransformer) { - AbstractJdbcDestination.Companion.attemptTableOperations(outputSchema, database, nameTransformer, sqlOperations, true) + protected fun performCreateInsertTestOnDestination( + outputSchema: String?, + database: JdbcDatabase, + nameTransformer: NamingConventionTransformer + ) { + AbstractJdbcDestination.Companion.attemptTableOperations( + outputSchema, + database, + nameTransformer, + sqlOperations, + true + ) } companion object { diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt index fa6e4ecc6988..0f23c527c2a3 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestination.kt @@ -12,11 +12,11 @@ import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer import io.airbyte.protocol.models.v0.AirbyteConnectionStatus import io.airbyte.protocol.models.v0.AirbyteMessage import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.util.* import java.util.function.Consumer import java.util.function.Function +import org.slf4j.Logger +import org.slf4j.LoggerFactory /** * Multiple configs may allow you to sync data to the destination in multiple ways. @@ -28,7 +28,11 @@ import java.util.function.Function * This class exists to make it easy to define a destination in terms of multiple other destination * implementations, switching between them based on the config provided. */ -class SwitchingDestination?>(enumClass: Class, configToType: Function, typeToDestination: Map) : BaseConnector(), Destination { +class SwitchingDestination>( + enumClass: Class, + configToType: Function, + typeToDestination: Map +) : BaseConnector(), Destination { private val configToType: Function private val typeToDestination: Map @@ -51,21 +55,33 @@ class SwitchingDestination?>(enumClass: Class, configToType: Func } @Throws(Exception::class) - override fun getConsumer(config: JsonNode, - catalog: ConfiguredAirbyteCatalog?, - outputRecordCollector: Consumer?): AirbyteMessageConsumer? { + override fun getConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): AirbyteMessageConsumer? { val destinationType = configToType.apply(config) LOGGER.info("Using destination type: " + destinationType!!.name) - return typeToDestination[destinationType]!!.getConsumer(config, catalog, outputRecordCollector) + return typeToDestination[destinationType]!!.getConsumer( + config, + catalog, + outputRecordCollector + ) } @Throws(Exception::class) - override fun getSerializedMessageConsumer(config: JsonNode, - catalog: ConfiguredAirbyteCatalog?, - outputRecordCollector: Consumer?): SerializedAirbyteMessageConsumer? { + override fun getSerializedMessageConsumer( + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + outputRecordCollector: Consumer + ): SerializedAirbyteMessageConsumer? { val destinationType = configToType.apply(config) LOGGER.info("Using destination type: " + destinationType!!.name) - return typeToDestination[destinationType]!!.getSerializedMessageConsumer(config, catalog, outputRecordCollector) + return typeToDestination[destinationType]!!.getSerializedMessageConsumer( + config, + catalog, + outputRecordCollector + ) } companion object { diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt index 90762aea6a0c..3ed79a50e1c1 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt @@ -5,6 +5,7 @@ package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping import com.fasterxml.jackson.databind.JsonNode import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.JavaBaseConstants import io.airbyte.cdk.integrations.destination.jdbc.ColumnDefinition import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition import io.airbyte.cdk.integrations.util.ConnectorExceptionUtil.getResultsOrLogAndThrowFirst @@ -15,15 +16,6 @@ import io.airbyte.commons.json.Jsons import io.airbyte.integrations.base.destination.typing_deduping.* import io.airbyte.integrations.base.destination.typing_deduping.Struct import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair -import lombok.extern.slf4j.Slf4j -import org.jooq.Condition -import org.jooq.DSLContext -import org.jooq.SQLDialect -import org.jooq.conf.ParamType -import org.jooq.impl.DSL -import org.jooq.impl.SQLDataType -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.sql.* import java.time.Instant import java.time.OffsetDateTime @@ -31,17 +23,27 @@ import java.time.temporal.ChronoUnit import java.util.* import java.util.concurrent.CompletableFuture import java.util.concurrent.CompletionStage -import java.util.function.BiConsumer import java.util.function.Function import java.util.function.Predicate -import java.util.function.Supplier import java.util.stream.Collectors +import kotlin.collections.LinkedHashMap +import lombok.extern.slf4j.Slf4j +import org.jooq.Condition +import org.jooq.DSLContext +import org.jooq.SQLDialect +import org.jooq.conf.ParamType +import org.jooq.impl.DSL +import org.jooq.impl.SQLDataType +import org.slf4j.Logger +import org.slf4j.LoggerFactory @Slf4j -abstract class JdbcDestinationHandler(protected val databaseName: String, - protected val jdbcDatabase: JdbcDatabase, - protected val rawTableSchemaName: String, - private val dialect: SQLDialect) : DestinationHandler { +abstract class JdbcDestinationHandler( + protected val databaseName: String, + protected val jdbcDatabase: JdbcDatabase, + protected val rawTableSchemaName: String, + private val dialect: SQLDialect +) : DestinationHandler { protected val dslContext: DSLContext get() = DSL.using(dialect) @@ -53,72 +55,112 @@ abstract class JdbcDestinationHandler(protected val databaseNa @Throws(Exception::class) private fun isFinalTableEmpty(id: StreamId): Boolean { return !jdbcDatabase.queryBoolean( - dslContext.select( - DSL.field(DSL.exists( - DSL.selectOne() - .from(DSL.name(id.finalNamespace, id.finalName)) - .limit(1)))) - .getSQL(ParamType.INLINED)) + dslContext + .select( + DSL.field( + DSL.exists( + DSL.selectOne().from(DSL.name(id.finalNamespace, id.finalName)).limit(1) + ) + ) + ) + .getSQL(ParamType.INLINED) + ) } @Throws(Exception::class) private fun getInitialRawTableState(id: StreamId): InitialRawTableStatus { - val tableExists = jdbcDatabase.executeMetadataQuery { dbmetadata: DatabaseMetaData? -> - LOGGER.info("Retrieving table from Db metadata: {} {} {}", databaseName, id.rawNamespace, id.rawName) - try { - dbmetadata!!.getTables(databaseName, id.rawNamespace, id.rawName, null).use { table -> - return@executeMetadataQuery table.next() + val tableExists = + jdbcDatabase.executeMetadataQuery { dbmetadata: DatabaseMetaData? -> + LOGGER.info( + "Retrieving table from Db metadata: {} {} {}", + databaseName, + id.rawNamespace, + id.rawName + ) + try { + dbmetadata!!.getTables(databaseName, id.rawNamespace, id.rawName, null).use { + table -> + return@executeMetadataQuery table.next() + } + } catch (e: SQLException) { + LOGGER.error("Failed to retrieve table info from metadata", e) + throw SQLRuntimeException(e) } - } catch (e: SQLException) { - LOGGER.error("Failed to retrieve table info from metadata", e) - throw SQLRuntimeException(e) } - } if (!tableExists) { - // There's no raw table at all. Therefore there are no unprocessed raw records, and this sync + // There's no raw table at all. Therefore there are no unprocessed raw records, and this + // sync // should not filter raw records by timestamp. return InitialRawTableStatus(false, false, Optional.empty()) } - jdbcDatabase.unsafeQuery( - CheckedFunction { conn: Connection -> + jdbcDatabase + .unsafeQuery( + CheckedFunction { conn: Connection -> conn.prepareStatement( - dslContext.select(DSL.field("MIN(_airbyte_extracted_at)").`as`("min_timestamp")) - .from(DSL.name(id.rawNamespace, id.rawName)) - .where(DSL.condition("_airbyte_loaded_at IS NULL")) - .sql) + dslContext + .select(DSL.field("MIN(_airbyte_extracted_at)").`as`("min_timestamp")) + .from(DSL.name(id.rawNamespace, id.rawName)) + .where(DSL.condition("_airbyte_loaded_at IS NULL")) + .sql + ) }, - CheckedFunction { record: ResultSet -> record.getTimestamp("min_timestamp") }).use { timestampStream -> - // Filter for nonNull values in case the query returned NULL (i.e. no unloaded records). - val minUnloadedTimestamp: Optional = timestampStream.filter(Predicate { obj: Timestamp? -> Objects.nonNull(obj) }).findFirst() - if (minUnloadedTimestamp.isPresent) { - // Decrement by 1 second since timestamp precision varies between databases. - val ts = minUnloadedTimestamp - .map { obj: Timestamp -> obj.toInstant() } - .map { i: Instant -> i.minus(1, ChronoUnit.SECONDS) } - return InitialRawTableStatus(true, true, ts) + CheckedFunction { record: ResultSet -> record.getTimestamp("min_timestamp") } + ) + .use { timestampStream -> + // Filter for nonNull values in case the query returned NULL (i.e. no unloaded + // records). + val minUnloadedTimestamp: Optional = + timestampStream + .filter(Predicate { obj: Timestamp? -> Objects.nonNull(obj) }) + .findFirst() + if (minUnloadedTimestamp.isPresent) { + // Decrement by 1 second since timestamp precision varies between databases. + val ts = + minUnloadedTimestamp + .map { obj: Timestamp -> obj.toInstant() } + .map { i: Instant -> i.minus(1, ChronoUnit.SECONDS) } + return InitialRawTableStatus(true, true, ts) + } } - } - jdbcDatabase.unsafeQuery( - CheckedFunction { conn: Connection -> + jdbcDatabase + .unsafeQuery( + CheckedFunction { conn: Connection -> conn.prepareStatement( - dslContext.select(DSL.field("MAX(_airbyte_extracted_at)").`as`("min_timestamp")) - .from(DSL.name(id.rawNamespace, id.rawName)) - .sql) + dslContext + .select(DSL.field("MAX(_airbyte_extracted_at)").`as`("min_timestamp")) + .from(DSL.name(id.rawNamespace, id.rawName)) + .sql + ) }, - CheckedFunction { record: ResultSet -> record.getTimestamp("min_timestamp") }).use { timestampStream -> - // Filter for nonNull values in case the query returned NULL (i.e. no raw records at all). - val minUnloadedTimestamp: Optional = timestampStream.filter(Predicate { obj: Timestamp? -> Objects.nonNull(obj) }).findFirst() - return InitialRawTableStatus(true, false, minUnloadedTimestamp.map { obj: Timestamp -> obj.toInstant() }) - } + CheckedFunction { record: ResultSet -> record.getTimestamp("min_timestamp") } + ) + .use { timestampStream -> + // Filter for nonNull values in case the query returned NULL (i.e. no raw records at + // all). + val minUnloadedTimestamp: Optional = + timestampStream + .filter(Predicate { obj: Timestamp? -> Objects.nonNull(obj) }) + .findFirst() + return InitialRawTableStatus( + true, + false, + minUnloadedTimestamp.map { obj: Timestamp -> obj.toInstant() } + ) + } } @Throws(Exception::class) - override fun execute(sql: Sql?) { - val transactions: List> = sql!!.transactions + override fun execute(sql: Sql) { + val transactions: List> = sql.transactions val queryId = UUID.randomUUID() for (transaction in transactions) { val transactionId = UUID.randomUUID() - LOGGER.info("Executing sql {}-{}: {}", queryId, transactionId, java.lang.String.join("\n", transaction)) + LOGGER.info( + "Executing sql {}-{}: {}", + queryId, + transactionId, + java.lang.String.join("\n", transaction) + ) val startTime = System.currentTimeMillis() try { @@ -128,26 +170,37 @@ abstract class JdbcDestinationHandler(protected val databaseNa throw e } - LOGGER.info("Sql {}-{} completed in {} ms", queryId, transactionId, System.currentTimeMillis() - startTime) + LOGGER.info( + "Sql {}-{} completed in {} ms", + queryId, + transactionId, + System.currentTimeMillis() - startTime + ) } } @Throws(Exception::class) - override fun gatherInitialState(streamConfigs: List?): List> { + override fun gatherInitialState( + streamConfigs: List + ): List> { // Use stream n/ns pair because we don't want to build the full StreamId here - val destinationStatesFuture = CompletableFuture.supplyAsync { - try { - return@supplyAsync allDestinationStates - } catch (e: SQLException) { - throw RuntimeException(e) + val destinationStatesFuture = + CompletableFuture.supplyAsync { + try { + return@supplyAsync allDestinationStates + } catch (e: SQLException) { + throw RuntimeException(e) + } } - } - val initialStates = streamConfigs!!.stream() - .map { streamConfig: StreamConfig? -> retrieveState(destinationStatesFuture, streamConfig) } + val initialStates = + streamConfigs + .stream() + .map { streamConfig: StreamConfig -> + retrieveState(destinationStatesFuture, streamConfig) + } .toList() - val states = - CompletableFutures.allOf(initialStates).toCompletableFuture().join() + val states = CompletableFutures.allOf(initialStates).toCompletableFuture().join() return getResultsOrLogAndThrowFirst("Failed to retrieve initial state", states) } @@ -156,57 +209,103 @@ abstract class JdbcDestinationHandler(protected val databaseNa get() { // Guarantee the table exists. jdbcDatabase.execute( - dslContext.createTableIfNotExists(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) - .column(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), SQLDataType.VARCHAR) - .column(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), SQLDataType.VARCHAR) // Just use a string type, even if the destination has a json type. - // We're never going to query this column in a fancy way - all our processing can happen - // client-side. - .column(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), SQLDataType.VARCHAR) // Add an updated_at field. We don't actually need it yet, but it can't hurt! - .column(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), SQLDataType.TIMESTAMPWITHTIMEZONE) - .getSQL(ParamType.INLINED)) - // Fetch all records from it. We _could_ filter down to just our streams... but meh. This is small + dslContext + .createTableIfNotExists( + DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME) + ) + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), + SQLDataType.VARCHAR + ) + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), + SQLDataType.VARCHAR + ) // Just use a string type, even if the destination has a json type. + // We're never going to query this column in a fancy way - all our processing + // can happen + // client-side. + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), + SQLDataType.VARCHAR + ) // Add an updated_at field. We don't actually need it yet, but it can't hurt! + .column( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), + SQLDataType.TIMESTAMPWITHTIMEZONE + ) + .getSQL(ParamType.INLINED) + ) + // Fetch all records from it. We _could_ filter down to just our streams... but meh. + // This is small // data. - return jdbcDatabase.queryJsons( - dslContext.select( + return jdbcDatabase + .queryJsons( + dslContext + .select( DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)), DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE)), - DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE))).from(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) - .sql) - .stream().collect(Collectors.toMap( - Function { record: JsonNode -> - val nameNode = record[DESTINATION_STATE_TABLE_COLUMN_NAME] - val namespaceNode = record[DESTINATION_STATE_TABLE_COLUMN_NAMESPACE] - AirbyteStreamNameNamespacePair( - nameNode?.asText(), - namespaceNode?.asText()) - }, - Function { record: JsonNode -> - val stateNode = record[DESTINATION_STATE_TABLE_COLUMN_STATE] - val state = if (stateNode != null) Jsons.deserialize(stateNode.asText()) else Jsons.emptyObject() - toDestinationState(state) - })) + DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE)) + ) + .from(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) + .sql + ) + .stream() + .collect( + Collectors.toMap( + Function { record: JsonNode -> + val nameNode = record[DESTINATION_STATE_TABLE_COLUMN_NAME] + val namespaceNode = record[DESTINATION_STATE_TABLE_COLUMN_NAMESPACE] + AirbyteStreamNameNamespacePair( + nameNode?.asText(), + namespaceNode?.asText() + ) + }, + Function { record: JsonNode -> + val stateNode = record[DESTINATION_STATE_TABLE_COLUMN_STATE] + val state = + if (stateNode != null) Jsons.deserialize(stateNode.asText()) + else Jsons.emptyObject() + toDestinationState(state) + } + ) + ) } - private fun retrieveState(destinationStatesFuture: CompletableFuture>, - streamConfig: StreamConfig?): CompletionStage> { - return destinationStatesFuture.thenApply { destinationStates: Map -> + private fun retrieveState( + destinationStatesFuture: + CompletableFuture>, + streamConfig: StreamConfig? + ): CompletionStage> { + return destinationStatesFuture.thenApply { + destinationStates: Map -> try { val finalTableDefinition = findExistingTable(streamConfig!!.id) val isSchemaMismatch: Boolean val isFinalTableEmpty: Boolean if (finalTableDefinition.isPresent) { - isSchemaMismatch = !existingSchemaMatchesStreamConfig(streamConfig, finalTableDefinition.get()) + isSchemaMismatch = + !existingSchemaMatchesStreamConfig(streamConfig, finalTableDefinition.get()) isFinalTableEmpty = isFinalTableEmpty(streamConfig.id) } else { - // If the final table doesn't exist, then by definition it doesn't have a schema mismatch and has no + // If the final table doesn't exist, then by definition it doesn't have a schema + // mismatch and has no // records. isSchemaMismatch = false isFinalTableEmpty = true } val initialRawTableState = getInitialRawTableState(streamConfig.id) - val destinationState = destinationStates.getOrDefault(streamConfig.id.asPair(), toDestinationState(Jsons.emptyObject())) - return@thenApply DestinationInitialStatus(streamConfig, finalTableDefinition.isPresent, initialRawTableState, - isSchemaMismatch, isFinalTableEmpty, destinationState) + val destinationState = + destinationStates.getOrDefault( + streamConfig.id.asPair(), + toDestinationState(Jsons.emptyObject()) + ) + return@thenApply DestinationInitialStatus( + streamConfig, + finalTableDefinition.isPresent, + initialRawTableState, + isSchemaMismatch, + isFinalTableEmpty, + destinationState + ) } catch (e: Exception) { throw RuntimeException(e) } @@ -214,74 +313,137 @@ abstract class JdbcDestinationHandler(protected val databaseNa } private fun isAirbyteRawIdColumnMatch(existingTable: TableDefinition): Boolean { - return existingTable.columns.containsKey(COLUMN_NAME_AB_RAW_ID) && toJdbcTypeName(AirbyteProtocolType.STRING) == existingTable.columns[COLUMN_NAME_AB_RAW_ID]!!.type + return existingTable.columns.containsKey(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID) && + toJdbcTypeName(AirbyteProtocolType.STRING) == + existingTable.columns[JavaBaseConstants.COLUMN_NAME_AB_RAW_ID]!!.type } private fun isAirbyteExtractedAtColumnMatch(existingTable: TableDefinition): Boolean { - return existingTable.columns.containsKey(COLUMN_NAME_AB_EXTRACTED_AT) && toJdbcTypeName(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE) == existingTable.columns[COLUMN_NAME_AB_EXTRACTED_AT]!!.type + return existingTable.columns.containsKey(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT) && + toJdbcTypeName(AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE) == + existingTable.columns[JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT]!!.type } private fun isAirbyteMetaColumnMatch(existingTable: TableDefinition): Boolean { - return existingTable.columns.containsKey(COLUMN_NAME_AB_META) && toJdbcTypeName(Struct(java.util.LinkedHashMap())) == existingTable.columns[COLUMN_NAME_AB_META]!!.type + return existingTable.columns.containsKey(JavaBaseConstants.COLUMN_NAME_AB_META) && + toJdbcTypeName(Struct(java.util.LinkedHashMap())) == + existingTable.columns[JavaBaseConstants.COLUMN_NAME_AB_META]!!.type } - protected fun existingSchemaMatchesStreamConfig(stream: StreamConfig?, existingTable: TableDefinition): Boolean { + protected fun existingSchemaMatchesStreamConfig( + stream: StreamConfig?, + existingTable: TableDefinition + ): Boolean { // Check that the columns match, with special handling for the metadata columns. - if (!isAirbyteRawIdColumnMatch(existingTable) || + if ( + !isAirbyteRawIdColumnMatch(existingTable) || !isAirbyteExtractedAtColumnMatch(existingTable) || - !isAirbyteMetaColumnMatch(existingTable)) { - // Missing AB meta columns from final table, we need them to do proper T+D so trigger soft-reset + !isAirbyteMetaColumnMatch(existingTable) + ) { + // Missing AB meta columns from final table, we need them to do proper T+D so trigger + // soft-reset return false } - val intendedColumns = stream!!.columns.entries.stream() - .collect(Supplier { LinkedHashMap() }, - BiConsumer, Map.Entry> { map: java.util.LinkedHashMap, column: Map.Entry -> map[column.key.name] = toJdbcTypeName(column.value) }, - BiConsumer { obj: java.util.LinkedHashMap, m: java.util.LinkedHashMap? -> obj.putAll(m!!) }) + val intendedColumns = + LinkedHashMap( + stream!!.columns!!.entries.associate { it.key.name to toJdbcTypeName(it.value) } + ) // Filter out Meta columns since they don't exist in stream config. - val actualColumns = existingTable.columns.entries.stream() + val actualColumns = + existingTable.columns.entries + .stream() .filter { column: Map.Entry -> - V2_FINAL_TABLE_METADATA_COLUMNS.stream() - .noneMatch(Predicate { airbyteColumnName: String -> airbyteColumnName == column.key }) + JavaBaseConstants.V2_FINAL_TABLE_METADATA_COLUMNS.stream() + .noneMatch( + Predicate { airbyteColumnName: String -> + airbyteColumnName == column.key + } + ) } - .collect({ LinkedHashMap() }, - { map: java.util.LinkedHashMap, column: Map.Entry -> map[column.key] = column.value.type }, - { obj: java.util.LinkedHashMap, m: java.util.LinkedHashMap? -> obj.putAll(m!!) }) + .collect( + { LinkedHashMap() }, + { + map: java.util.LinkedHashMap, + column: Map.Entry -> + map[column.key] = column.value.type + }, + { + obj: java.util.LinkedHashMap, + m: java.util.LinkedHashMap? -> + obj.putAll(m!!) + } + ) return actualColumns == intendedColumns } @Throws(Exception::class) - override fun commitDestinationStates(destinationStates: Map?) { - if (destinationStates!!.isEmpty()) { + override fun commitDestinationStates(destinationStates: Map) { + if (destinationStates.isEmpty()) { return } // Delete all state records where the stream name+namespace match one of our states - val deleteStates = dslContext.deleteFrom(DSL.table(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME))) - .where(destinationStates.keys.stream() - .map { streamId: StreamId? -> - DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)).eq(streamId!!.originalName) - .and(DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE)).eq(streamId.originalNamespace)) + val deleteStates = + dslContext + .deleteFrom( + DSL.table(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) + ) + .where( + destinationStates.keys + .stream() + .map { streamId: StreamId -> + DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME)) + .eq(streamId.originalName) + .and( + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE) + ) + .eq(streamId.originalNamespace) + ) } - .reduce( - DSL.falseCondition() - ) { obj: Condition, arg2: Condition? -> obj.or(arg2) }) + .reduce(DSL.falseCondition()) { obj: Condition, arg2: Condition? -> + obj.or(arg2) + } + ) .getSQL(ParamType.INLINED) // Reinsert all of our states var insertStatesStep = - dslContext.insertInto(DSL.table(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME))) - .columns( - DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), String::class.java), - DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), String::class.java), - DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), String::class.java), // This field is a timestamptz, but it's easier to just insert a string - // and assume the destination can cast it appropriately. - // Destination-specific timestamp syntax is weird and annoying. - DSL.field(DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), String::class.java)) + dslContext + .insertInto( + DSL.table(DSL.quotedName(rawTableSchemaName, DESTINATION_STATE_TABLE_NAME)) + ) + .columns( + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAME), + String::class.java + ), + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_NAMESPACE), + String::class.java + ), + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_STATE), + String::class.java + ), // This field is a timestamptz, but it's easier to just insert a string + // and assume the destination can cast it appropriately. + // Destination-specific timestamp syntax is weird and annoying. + DSL.field( + DSL.quotedName(DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT), + String::class.java + ) + ) for ((streamId, value) in destinationStates) { val stateJson = Jsons.serialize(value) - insertStatesStep = insertStatesStep.values(streamId!!.originalName, streamId.originalNamespace, stateJson, OffsetDateTime.now().toString()) + insertStatesStep = + insertStatesStep.values( + streamId!!.originalName, + streamId.originalNamespace, + stateJson, + OffsetDateTime.now().toString() + ) } val insertStates = insertStatesStep.getSQL(ParamType.INLINED) @@ -307,32 +469,54 @@ abstract class JdbcDestinationHandler(protected val databaseNa private const val DESTINATION_STATE_TABLE_COLUMN_UPDATED_AT = "updated_at" @Throws(SQLException::class) - fun findExistingTable(jdbcDatabase: JdbcDatabase, - databaseName: String?, - schemaName: String?, - tableName: String?): Optional { - val retrievedColumnDefns = jdbcDatabase.executeMetadataQuery { dbMetadata: DatabaseMetaData? -> + fun findExistingTable( + jdbcDatabase: JdbcDatabase, + databaseName: String?, + schemaName: String?, + tableName: String? + ): Optional { + val retrievedColumnDefns = + jdbcDatabase.executeMetadataQuery { dbMetadata: DatabaseMetaData? -> - // TODO: normalize namespace and finalName strings to quoted-lowercase (as needed. Snowflake - // requires uppercase) - val columnDefinitions = java.util.LinkedHashMap() - LOGGER.info("Retrieving existing columns for {}.{}.{}", databaseName, schemaName, tableName) - try { - dbMetadata!!.getColumns(databaseName, schemaName, tableName, null).use { columns -> - while (columns.next()) { - val columnName = columns.getString("COLUMN_NAME") - val typeName = columns.getString("TYPE_NAME") - val columnSize = columns.getInt("COLUMN_SIZE") - val isNullable = columns.getString("IS_NULLABLE") - columnDefinitions[columnName] = ColumnDefinition(columnName, typeName, columnSize, fromIsNullableIsoString(isNullable)) + // TODO: normalize namespace and finalName strings to quoted-lowercase (as + // needed. Snowflake + // requires uppercase) + val columnDefinitions = java.util.LinkedHashMap() + LOGGER.info( + "Retrieving existing columns for {}.{}.{}", + databaseName, + schemaName, + tableName + ) + try { + dbMetadata!!.getColumns(databaseName, schemaName, tableName, null).use { + columns -> + while (columns.next()) { + val columnName = columns.getString("COLUMN_NAME") + val typeName = columns.getString("TYPE_NAME") + val columnSize = columns.getInt("COLUMN_SIZE") + val isNullable = columns.getString("IS_NULLABLE") + columnDefinitions[columnName] = + ColumnDefinition( + columnName, + typeName, + columnSize, + fromIsNullableIsoString(isNullable) + ) + } } + } catch (e: SQLException) { + LOGGER.error( + "Failed to retrieve column info for {}.{}.{}", + databaseName, + schemaName, + tableName, + e + ) + throw SQLRuntimeException(e) } - } catch (e: SQLException) { - LOGGER.error("Failed to retrieve column info for {}.{}.{}", databaseName, schemaName, tableName, e) - throw SQLRuntimeException(e) + columnDefinitions } - columnDefinitions - } // Guard to fail fast if (retrievedColumnDefns.isEmpty()) { return Optional.empty() diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt index 2048adbe27ac..060e0b484f0c 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.kt @@ -4,16 +4,14 @@ package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping import com.google.common.annotations.VisibleForTesting +import io.airbyte.cdk.integrations.base.JavaBaseConstants import io.airbyte.cdk.integrations.destination.NamingConventionTransformer import io.airbyte.integrations.base.destination.typing_deduping.* +import io.airbyte.integrations.base.destination.typing_deduping.Array import io.airbyte.integrations.base.destination.typing_deduping.Sql.Companion.of import io.airbyte.integrations.base.destination.typing_deduping.Sql.Companion.transactionally import io.airbyte.integrations.base.destination.typing_deduping.StreamId.Companion.concatenateRawTableName import io.airbyte.protocol.models.v0.DestinationSyncMode -import org.jooq.* -import org.jooq.conf.ParamType -import org.jooq.impl.DSL -import org.jooq.impl.SQLDataType import java.sql.Timestamp import java.time.Instant import java.util.* @@ -26,26 +24,37 @@ import kotlin.Int import kotlin.String import kotlin.UnsupportedOperationException import kotlin.plus +import org.jooq.* +import org.jooq.conf.ParamType +import org.jooq.impl.DSL +import org.jooq.impl.SQLDataType -abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventionTransformer) : SqlGenerator { +abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventionTransformer) : + SqlGenerator { protected val cdcDeletedAtColumn: ColumnId = buildColumnId("_ab_cdc_deleted_at") - override fun buildStreamId(namespace: String?, name: String?, rawNamespaceOverride: String?): StreamId { + override fun buildStreamId( + namespace: String, + name: String, + rawNamespaceOverride: String + ): StreamId { return StreamId( - namingTransformer.getNamespace(namespace!!), - namingTransformer.convertStreamName(name!!), - namingTransformer.getNamespace(rawNamespaceOverride!!), - namingTransformer.convertStreamName(concatenateRawTableName(namespace, name)), - namespace, - name) + namingTransformer.getNamespace(namespace), + namingTransformer.convertStreamName(name), + namingTransformer.getNamespace(rawNamespaceOverride), + namingTransformer.convertStreamName(concatenateRawTableName(namespace, name)), + namespace, + name + ) } - override fun buildColumnId(name: String?, suffix: String?): ColumnId { + override fun buildColumnId(name: String, suffix: String?): ColumnId { val nameWithSuffix = name + suffix return ColumnId( - namingTransformer.getIdentifier(nameWithSuffix), - name!!, - namingTransformer.getIdentifier(nameWithSuffix)) + namingTransformer.getIdentifier(nameWithSuffix), + name, + namingTransformer.getIdentifier(nameWithSuffix) + ) } protected fun toDialectType(type: AirbyteType): DataType<*> { @@ -53,7 +62,8 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio return toDialectType(type) } return when (type.typeName) { - Struct.TYPE, UnsupportedOneOf.TYPE -> structType + Struct.TYPE, + UnsupportedOneOf.TYPE -> structType Array.TYPE -> arrayType!! Union.TYPE -> toDialectType((type as Union).chooseType()) else -> throw IllegalArgumentException("Unsupported AirbyteType: $type") @@ -61,7 +71,7 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio } @VisibleForTesting - fun toDialectType(airbyteProtocolType: AirbyteProtocolType?): DataType<*> { + fun toDialectType(airbyteProtocolType: AirbyteProtocolType): DataType<*> { return when (airbyteProtocolType) { AirbyteProtocolType.STRING -> SQLDataType.VARCHAR(65535) AirbyteProtocolType.NUMBER -> SQLDataType.DECIMAL(38, 9) @@ -98,7 +108,10 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio * @param useExpensiveSaferCasting * @return a list of jooq fields for the final table insert statement. */ - protected abstract fun extractRawDataFields(columns: LinkedHashMap?, useExpensiveSaferCasting: Boolean): MutableList> + protected abstract fun extractRawDataFields( + columns: LinkedHashMap, + useExpensiveSaferCasting: Boolean + ): MutableList> /** * @@ -106,25 +119,30 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio * column * @return */ - protected abstract fun buildAirbyteMetaColumn(columns: LinkedHashMap?): Field<*>? + protected abstract fun buildAirbyteMetaColumn( + columns: LinkedHashMap + ): Field<*>? /** - * Get the cdc_deleted_at column condition for append_dedup mode by extracting it from _airbyte_data - * column in raw table. + * Get the cdc_deleted_at column condition for append_dedup mode by extracting it from + * _airbyte_data column in raw table. * * @return */ protected abstract fun cdcDeletedAtNotNullCondition(): Condition? /** - * Get the window step function row_number() over (partition by primary_key order by cursor_field) - * as row_number. + * Get the window step function row_number() over (partition by primary_key order by + * cursor_field) as row_number. * * @param primaryKey list of primary keys * @param cursorField cursor field used for ordering * @return */ - protected abstract fun getRowNumber(primaryKey: List?, cursorField: Optional?): Field + protected abstract fun getRowNumber( + primaryKey: List?, + cursorField: Optional + ): Field protected val dslContext: DSLContext get() = DSL.using(dialect) @@ -137,12 +155,24 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio * @return */ @VisibleForTesting - fun buildFinalTableFields(columns: LinkedHashMap, metaColumns: Map?>): List> { + fun buildFinalTableFields( + columns: LinkedHashMap, + metaColumns: Map?> + ): List> { val fields = - metaColumns.entries.stream().map { metaColumn: Map.Entry?> -> DSL.field(DSL.quotedName(metaColumn.key), metaColumn.value) }.collect(Collectors.toList()) + metaColumns.entries + .stream() + .map { metaColumn: Map.Entry?> -> + DSL.field(DSL.quotedName(metaColumn.key), metaColumn.value) + } + .collect(Collectors.toList()) val dataFields = - columns.entries.stream().map { column: Map.Entry -> DSL.field(DSL.quotedName(column.key!!.name), toDialectType(column.value)) }.collect( - Collectors.toList()) + columns.entries + .stream() + .map { column: Map.Entry -> + DSL.field(DSL.quotedName(column.key!!.name), toDialectType(column.value)) + } + .collect(Collectors.toList()) dataFields.addAll(fields) return dataFields } @@ -155,26 +185,36 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio */ fun getFinalTableMetaColumns(includeMetaColumn: Boolean): LinkedHashMap?> { val metaColumns = LinkedHashMap?>() - metaColumns[COLUMN_NAME_AB_RAW_ID] = SQLDataType.VARCHAR(36).nullable(false) - metaColumns[COLUMN_NAME_AB_EXTRACTED_AT] = timestampWithTimeZoneType.nullable(false) - if (includeMetaColumn) metaColumns[COLUMN_NAME_AB_META] = structType.nullable(false) + metaColumns[JavaBaseConstants.COLUMN_NAME_AB_RAW_ID] = + SQLDataType.VARCHAR(36).nullable(false) + metaColumns[JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT] = + timestampWithTimeZoneType.nullable(false) + if (includeMetaColumn) + metaColumns[JavaBaseConstants.COLUMN_NAME_AB_META] = structType.nullable(false) return metaColumns } /** - * build jooq fields for raw table with type-casted data columns first and then meta columns without - * _airbyte_meta. + * build jooq fields for raw table with type-casted data columns first and then meta columns + * without _airbyte_meta. * * @param columns * @param metaColumns * @return */ @VisibleForTesting - fun buildRawTableSelectFields(columns: LinkedHashMap?, - metaColumns: Map?>, - useExpensiveSaferCasting: Boolean): List> { + fun buildRawTableSelectFields( + columns: LinkedHashMap, + metaColumns: Map?>, + useExpensiveSaferCasting: Boolean + ): List> { val fields = - metaColumns.entries.stream().map { metaColumn: Map.Entry?> -> DSL.field(DSL.quotedName(metaColumn.key), metaColumn.value) }.collect(Collectors.toList()) + metaColumns.entries + .stream() + .map { metaColumn: Map.Entry?> -> + DSL.field(DSL.quotedName(metaColumn.key), metaColumn.value) + } + .collect(Collectors.toList()) // Use originalName with non-sanitized characters when extracting data from _airbyte_data val dataFields = extractRawDataFields(columns, useExpensiveSaferCasting) dataFields.addAll(fields) @@ -182,15 +222,24 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio } @VisibleForTesting - fun rawTableCondition(syncMode: DestinationSyncMode, isCdcDeletedAtPresent: Boolean, minRawTimestamp: Optional): Condition { - var condition: Condition = DSL.field(DSL.name(COLUMN_NAME_AB_LOADED_AT)).isNull() + fun rawTableCondition( + syncMode: DestinationSyncMode, + isCdcDeletedAtPresent: Boolean, + minRawTimestamp: Optional + ): Condition { + var condition: Condition = + DSL.field(DSL.name(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT)).isNull() if (syncMode == DestinationSyncMode.APPEND_DEDUP) { if (isCdcDeletedAtPresent) { condition = condition.or(cdcDeletedAtNotNullCondition()) } } if (minRawTimestamp.isPresent) { - condition = condition.and(DSL.field(DSL.name(COLUMN_NAME_AB_EXTRACTED_AT)).gt(minRawTimestamp.get().toString())) + condition = + condition.and( + DSL.field(DSL.name(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)) + .gt(minRawTimestamp.get().toString()) + ) } return condition } @@ -199,156 +248,255 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio return of(createSchemaSql(schema)) } - override fun createTable(stream: StreamConfig?, suffix: String?, force: Boolean): Sql { + override fun createTable(stream: StreamConfig, suffix: String, force: Boolean): Sql { // TODO: Use Naming transformer to sanitize these strings with redshift restrictions. - val finalTableIdentifier = stream!!.id.finalName + suffix!!.lowercase(Locale.getDefault()) + val finalTableIdentifier = stream.id.finalName + suffix.lowercase(Locale.getDefault()) if (!force) { - return transactionally(Stream.concat( - Stream.of(createTableSql(stream.id.finalNamespace, finalTableIdentifier, stream.columns)), - createIndexSql(stream, suffix).stream()).toList()) + return transactionally( + Stream.concat( + Stream.of( + createTableSql( + stream.id.finalNamespace, + finalTableIdentifier, + stream.columns!! + ) + ), + createIndexSql(stream, suffix).stream() + ) + .toList() + ) } - return transactionally(Stream.concat( - Stream.of( - DSL.dropTableIfExists(DSL.quotedName(stream.id.finalNamespace, finalTableIdentifier)).getSQL(ParamType.INLINED), - createTableSql(stream.id.finalNamespace, finalTableIdentifier, stream.columns)), - createIndexSql(stream, suffix).stream()).toList()) + return transactionally( + Stream.concat( + Stream.of( + DSL.dropTableIfExists( + DSL.quotedName(stream.id.finalNamespace, finalTableIdentifier) + ) + .getSQL(ParamType.INLINED), + createTableSql( + stream.id.finalNamespace, + finalTableIdentifier, + stream.columns!! + ) + ), + createIndexSql(stream, suffix).stream() + ) + .toList() + ) } - override fun updateTable(streamConfig: StreamConfig?, - finalSuffix: String?, - minRawTimestamp: Optional, - useExpensiveSaferCasting: Boolean): Sql? { + override fun updateTable( + streamConfig: StreamConfig, + finalSuffix: String?, + minRawTimestamp: Optional, + useExpensiveSaferCasting: Boolean + ): Sql { // TODO: Add flag to use merge vs insert/delete - return insertAndDeleteTransaction(streamConfig, finalSuffix, minRawTimestamp, useExpensiveSaferCasting) + return insertAndDeleteTransaction( + streamConfig, + finalSuffix, + minRawTimestamp, + useExpensiveSaferCasting + ) } - override fun overwriteFinalTable(stream: StreamId?, finalSuffix: String?): Sql? { + override fun overwriteFinalTable(stream: StreamId, finalSuffix: String?): Sql { return transactionally( - DSL.dropTableIfExists(DSL.name(stream!!.finalNamespace, stream.finalName)).getSQL(ParamType.INLINED), - DSL.alterTable(DSL.name(stream.finalNamespace, stream.finalName + finalSuffix)) - .renameTo(DSL.name(stream.finalName)) - .sql) + DSL.dropTableIfExists(DSL.name(stream.finalNamespace, stream.finalName)) + .getSQL(ParamType.INLINED), + DSL.alterTable(DSL.name(stream.finalNamespace, stream.finalName + finalSuffix)) + .renameTo(DSL.name(stream.finalName)) + .sql + ) } - override fun migrateFromV1toV2(streamId: StreamId?, namespace: String?, tableName: String?): Sql? { - val rawTableName = DSL.name(streamId!!.rawNamespace, streamId.rawName) + override fun migrateFromV1toV2( + streamId: StreamId, + namespace: String?, + tableName: String? + ): Sql { + val rawTableName = DSL.name(streamId.rawNamespace, streamId.rawName) val dsl = dslContext return transactionally( - dsl.createSchemaIfNotExists(streamId.rawNamespace).sql, - dsl.dropTableIfExists(rawTableName).sql, - DSL.createTable(rawTableName) - .column(COLUMN_NAME_AB_RAW_ID, SQLDataType.VARCHAR(36).nullable(false)) - .column(COLUMN_NAME_AB_EXTRACTED_AT, timestampWithTimeZoneType.nullable(false)) - .column(COLUMN_NAME_AB_LOADED_AT, timestampWithTimeZoneType.nullable(true)) - .column(COLUMN_NAME_DATA, structType.nullable(false)) - .column(COLUMN_NAME_AB_META, structType.nullable(true)) - .`as`(DSL.select( - DSL.field(COLUMN_NAME_AB_ID).`as`(COLUMN_NAME_AB_RAW_ID), - DSL.field(COLUMN_NAME_EMITTED_AT).`as`(COLUMN_NAME_AB_EXTRACTED_AT), - DSL.cast(null, timestampWithTimeZoneType).`as`(COLUMN_NAME_AB_LOADED_AT), - DSL.field(COLUMN_NAME_DATA).`as`(COLUMN_NAME_DATA), - DSL.cast(null, structType).`as`(COLUMN_NAME_AB_META)).from(DSL.table(DSL.name(namespace, tableName)))) - .getSQL(ParamType.INLINED)) + dsl.createSchemaIfNotExists(streamId.rawNamespace).sql, + dsl.dropTableIfExists(rawTableName).sql, + DSL.createTable(rawTableName) + .column( + JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, + SQLDataType.VARCHAR(36).nullable(false) + ) + .column( + JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, + timestampWithTimeZoneType.nullable(false) + ) + .column( + JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT, + timestampWithTimeZoneType.nullable(true) + ) + .column(JavaBaseConstants.COLUMN_NAME_DATA, structType.nullable(false)) + .column(JavaBaseConstants.COLUMN_NAME_AB_META, structType.nullable(true)) + .`as`( + DSL.select( + DSL.field(JavaBaseConstants.COLUMN_NAME_AB_ID) + .`as`(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID), + DSL.field(JavaBaseConstants.COLUMN_NAME_EMITTED_AT) + .`as`(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT), + DSL.cast(null, timestampWithTimeZoneType) + .`as`(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT), + DSL.field(JavaBaseConstants.COLUMN_NAME_DATA) + .`as`(JavaBaseConstants.COLUMN_NAME_DATA), + DSL.cast(null, structType).`as`(JavaBaseConstants.COLUMN_NAME_AB_META) + ) + .from(DSL.table(DSL.name(namespace, tableName))) + ) + .getSQL(ParamType.INLINED) + ) } - override fun clearLoadedAt(streamId: StreamId?): Sql { - return of(DSL.update(DSL.table(DSL.name(streamId!!.rawNamespace, streamId.rawName))) - .set(DSL.field(COLUMN_NAME_AB_LOADED_AT), DSL.inline(null as String?)) - .sql) + override fun clearLoadedAt(streamId: StreamId): Sql { + return of( + DSL.update(DSL.table(DSL.name(streamId.rawNamespace, streamId.rawName))) + .set( + DSL.field(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT), + DSL.inline(null as String?) + ) + .sql + ) } @VisibleForTesting - fun selectFromRawTable(schemaName: String?, - tableName: String?, - columns: LinkedHashMap?, - metaColumns: Map?>, - condition: Condition?, - useExpensiveSaferCasting: Boolean): SelectConditionStep { + fun selectFromRawTable( + schemaName: String?, + tableName: String?, + columns: LinkedHashMap, + metaColumns: Map?>, + condition: Condition?, + useExpensiveSaferCasting: Boolean + ): SelectConditionStep { val dsl = dslContext - return dsl - .select(buildRawTableSelectFields(columns, metaColumns, useExpensiveSaferCasting)) - .select(buildAirbyteMetaColumn(columns)) - .from(DSL.table(DSL.quotedName(schemaName, tableName))) - .where(condition) + return dsl.select(buildRawTableSelectFields(columns, metaColumns, useExpensiveSaferCasting)) + .select(buildAirbyteMetaColumn(columns)) + .from(DSL.table(DSL.quotedName(schemaName, tableName))) + .where(condition) } @VisibleForTesting - fun insertIntoFinalTable(schemaName: String?, - tableName: String?, - columns: LinkedHashMap, - metaFields: Map?>): InsertValuesStepN { + fun insertIntoFinalTable( + schemaName: String?, + tableName: String?, + columns: LinkedHashMap, + metaFields: Map?> + ): InsertValuesStepN { val dsl = dslContext - return dsl - .insertInto(DSL.table(DSL.quotedName(schemaName, tableName))) - .columns(buildFinalTableFields(columns, metaFields)) + return dsl.insertInto(DSL.table(DSL.quotedName(schemaName, tableName))) + .columns(buildFinalTableFields(columns, metaFields)) } - private fun insertAndDeleteTransaction(streamConfig: StreamConfig?, - finalSuffix: String?, - minRawTimestamp: Optional, - useExpensiveSaferCasting: Boolean): Sql { - val finalSchema = streamConfig!!.id.finalNamespace - val finalTable = streamConfig.id.finalName + (finalSuffix?.lowercase(Locale.getDefault()) ?: "") + private fun insertAndDeleteTransaction( + streamConfig: StreamConfig, + finalSuffix: String?, + minRawTimestamp: Optional, + useExpensiveSaferCasting: Boolean + ): Sql { + val finalSchema = streamConfig.id.finalNamespace + val finalTable = + streamConfig.id.finalName + (finalSuffix?.lowercase(Locale.getDefault()) ?: "") val rawSchema = streamConfig.id.rawNamespace val rawTable = streamConfig.id.rawName - // Poor person's guarantee of ordering of fields by using same source of ordered list of columns to + // Poor person's guarantee of ordering of fields by using same source of ordered list of + // columns to // generate fields. - val rawTableRowsWithCast = DSL.name(TYPING_CTE_ALIAS).`as`( - selectFromRawTable(rawSchema, rawTable, streamConfig.columns, + val rawTableRowsWithCast = + DSL.name(TYPING_CTE_ALIAS) + .`as`( + selectFromRawTable( + rawSchema, + rawTable, + streamConfig.columns!!, getFinalTableMetaColumns(false), - rawTableCondition(streamConfig.destinationSyncMode, - streamConfig.columns.containsKey(cdcDeletedAtColumn), - minRawTimestamp), - useExpensiveSaferCasting)) - val finalTableFields = buildFinalTableFields(streamConfig.columns, getFinalTableMetaColumns(true)) - val rowNumber = getRowNumber(streamConfig.primaryKey, streamConfig.cursor) - val filteredRows = DSL.name(NUMBERED_ROWS_CTE_ALIAS).`as`( - DSL.select(DSL.asterisk(), rowNumber).from(rawTableRowsWithCast)) + rawTableCondition( + streamConfig.destinationSyncMode!!, + streamConfig.columns!!.containsKey(cdcDeletedAtColumn), + minRawTimestamp + ), + useExpensiveSaferCasting + ) + ) + val finalTableFields = + buildFinalTableFields(streamConfig.columns!!, getFinalTableMetaColumns(true)) + val rowNumber = getRowNumber(streamConfig.primaryKey, streamConfig.cursor!!) + val filteredRows = + DSL.name(NUMBERED_ROWS_CTE_ALIAS) + .`as`(DSL.select(DSL.asterisk(), rowNumber).from(rawTableRowsWithCast)) // Used for append-dedupe mode. val insertStmtWithDedupe = - insertIntoFinalTable(finalSchema, finalTable, streamConfig.columns, getFinalTableMetaColumns(true)) - .select(DSL.with(rawTableRowsWithCast) - .with(filteredRows) - .select(finalTableFields) - .from(filteredRows) - .where(DSL.field(DSL.name(ROW_NUMBER_COLUMN_NAME), Int::class.java).eq(1)) // Can refer by CTE.field but no use since we don't strongly type - // them. - ) - .getSQL(ParamType.INLINED) + insertIntoFinalTable( + finalSchema, + finalTable, + streamConfig.columns!!, + getFinalTableMetaColumns(true) + ) + .select( + DSL.with(rawTableRowsWithCast) + .with(filteredRows) + .select(finalTableFields) + .from(filteredRows) + .where( + DSL.field(DSL.name(ROW_NUMBER_COLUMN_NAME), Int::class.java).eq(1) + ) // Can refer by CTE.field but no use since we don't strongly type + // them. + ) + .getSQL(ParamType.INLINED) // Used for append and overwrite modes. val insertStmt = - insertIntoFinalTable(finalSchema, finalTable, streamConfig.columns, getFinalTableMetaColumns(true)) - .select(DSL.with(rawTableRowsWithCast) - .select(finalTableFields) - .from(rawTableRowsWithCast)) - .getSQL(ParamType.INLINED) - val deleteStmt = deleteFromFinalTable(finalSchema, finalTable, streamConfig.primaryKey, streamConfig.cursor) + insertIntoFinalTable( + finalSchema, + finalTable, + streamConfig.columns!!, + getFinalTableMetaColumns(true) + ) + .select( + DSL.with(rawTableRowsWithCast) + .select(finalTableFields) + .from(rawTableRowsWithCast) + ) + .getSQL(ParamType.INLINED) + val deleteStmt = + deleteFromFinalTable( + finalSchema, + finalTable, + streamConfig.primaryKey!!, + streamConfig.cursor!! + ) val deleteCdcDeletesStmt = - if (streamConfig.columns.containsKey(cdcDeletedAtColumn)) deleteFromFinalTableCdcDeletes(finalSchema, finalTable) else "" + if (streamConfig.columns!!.containsKey(cdcDeletedAtColumn)) + deleteFromFinalTableCdcDeletes(finalSchema, finalTable) + else "" val checkpointStmt = checkpointRawTable(rawSchema, rawTable, minRawTimestamp) if (streamConfig.destinationSyncMode != DestinationSyncMode.APPEND_DEDUP) { - return transactionally( - insertStmt, - checkpointStmt) + return transactionally(insertStmt, checkpointStmt) } // For append-dedupe return transactionally( - insertStmtWithDedupe, - deleteStmt, - deleteCdcDeletesStmt, - checkpointStmt) + insertStmtWithDedupe, + deleteStmt, + deleteCdcDeletesStmt, + checkpointStmt + ) } - private fun mergeTransaction(streamConfig: StreamConfig, - finalSuffix: String, - minRawTimestamp: Optional, - useExpensiveSaferCasting: Boolean): String { + private fun mergeTransaction( + streamConfig: StreamConfig, + finalSuffix: String, + minRawTimestamp: Optional, + useExpensiveSaferCasting: Boolean + ): String { throw UnsupportedOperationException("Not implemented yet") } @@ -358,17 +506,22 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio return createSchemaSql.sql } - protected fun createTableSql(namespace: String?, tableName: String?, columns: LinkedHashMap): String { + protected fun createTableSql( + namespace: String?, + tableName: String?, + columns: LinkedHashMap + ): String { val dsl = dslContext - val createTableSql = dsl - .createTable(DSL.quotedName(namespace, tableName)) + val createTableSql = + dsl.createTable(DSL.quotedName(namespace, tableName)) .columns(buildFinalTableFields(columns, getFinalTableMetaColumns(true))) return createTableSql.sql } /** - * Subclasses may override this method to add additional indexes after their CREATE TABLE statement. - * This is useful if the destination's CREATE TABLE statement does not accept an index definition. + * Subclasses may override this method to add additional indexes after their CREATE TABLE + * statement. This is useful if the destination's CREATE TABLE statement does not accept an + * index definition. */ protected fun createIndexSql(stream: StreamConfig?, suffix: String?): List { return emptyList() @@ -386,61 +539,89 @@ abstract class JdbcSqlGenerator(protected val namingTransformer: NamingConventio return commitTransaction() + ";" } - private fun deleteFromFinalTable(schemaName: String?, - tableName: String, - primaryKeys: List, - cursor: Optional): String { + private fun deleteFromFinalTable( + schemaName: String?, + tableName: String, + primaryKeys: List, + cursor: Optional + ): String { val dsl = dslContext // Unknown type doesn't play well with where .. in (select..) - val airbyteRawId: Field = DSL.field(DSL.quotedName(COLUMN_NAME_AB_RAW_ID)) + val airbyteRawId: Field = + DSL.field(DSL.quotedName(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID)) val rowNumber = getRowNumber(primaryKeys, cursor) return dsl.deleteFrom(DSL.table(DSL.quotedName(schemaName, tableName))) - .where(airbyteRawId.`in`( - DSL.select(airbyteRawId) - .from(DSL.select(airbyteRawId, rowNumber) - .from(DSL.table(DSL.quotedName(schemaName, tableName))).asTable("airbyte_ids")) - .where(DSL.field(DSL.name(ROW_NUMBER_COLUMN_NAME)).ne(1)))) - .getSQL(ParamType.INLINED) + .where( + airbyteRawId.`in`( + DSL.select(airbyteRawId) + .from( + DSL.select(airbyteRawId, rowNumber) + .from(DSL.table(DSL.quotedName(schemaName, tableName))) + .asTable("airbyte_ids") + ) + .where(DSL.field(DSL.name(ROW_NUMBER_COLUMN_NAME)).ne(1)) + ) + ) + .getSQL(ParamType.INLINED) } private fun deleteFromFinalTableCdcDeletes(schema: String?, tableName: String): String { val dsl = dslContext return dsl.deleteFrom(DSL.table(DSL.quotedName(schema, tableName))) - .where(DSL.field(DSL.quotedName(cdcDeletedAtColumn.name)).isNotNull()) - .getSQL(ParamType.INLINED) + .where(DSL.field(DSL.quotedName(cdcDeletedAtColumn.name)).isNotNull()) + .getSQL(ParamType.INLINED) } - private fun checkpointRawTable(schemaName: String?, tableName: String?, minRawTimestamp: Optional): String { + private fun checkpointRawTable( + schemaName: String?, + tableName: String?, + minRawTimestamp: Optional + ): String { val dsl = dslContext var extractedAtCondition = DSL.noCondition() if (minRawTimestamp.isPresent) { - extractedAtCondition = extractedAtCondition.and(DSL.field(DSL.name(COLUMN_NAME_AB_EXTRACTED_AT)).gt(minRawTimestamp.get().toString())) + extractedAtCondition = + extractedAtCondition.and( + DSL.field(DSL.name(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)) + .gt(minRawTimestamp.get().toString()) + ) } return dsl.update(DSL.table(DSL.quotedName(schemaName, tableName))) - .set(DSL.field(DSL.quotedName(COLUMN_NAME_AB_LOADED_AT)), currentTimestamp()) - .where(DSL.field(DSL.quotedName(COLUMN_NAME_AB_LOADED_AT)).isNull()).and(extractedAtCondition) - .getSQL(ParamType.INLINED) + .set( + DSL.field(DSL.quotedName(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT)), + currentTimestamp() + ) + .where(DSL.field(DSL.quotedName(JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT)).isNull()) + .and(extractedAtCondition) + .getSQL(ParamType.INLINED) } protected fun castedField( - field: Field<*>?, - type: AirbyteType, - alias: String?, - useExpensiveSaferCasting: Boolean): Field<*> { + field: Field<*>?, + type: AirbyteType, + alias: String?, + useExpensiveSaferCasting: Boolean + ): Field<*> { if (type is AirbyteProtocolType) { return castedField(field, type, useExpensiveSaferCasting).`as`(DSL.quotedName(alias)) } // Redshift SUPER can silently cast an array type to struct and vice versa. return when (type.typeName) { - Struct.TYPE, UnsupportedOneOf.TYPE -> DSL.cast(field, structType).`as`(DSL.quotedName(alias)) + Struct.TYPE, + UnsupportedOneOf.TYPE -> DSL.cast(field, structType).`as`(DSL.quotedName(alias)) Array.TYPE -> DSL.cast(field, arrayType).`as`(DSL.quotedName(alias)) - Union.TYPE -> castedField(field, (type as Union).chooseType(), alias, useExpensiveSaferCasting) + Union.TYPE -> + castedField(field, (type as Union).chooseType(), alias, useExpensiveSaferCasting) else -> throw IllegalArgumentException("Unsupported AirbyteType: $type") } } - protected fun castedField(field: Field<*>?, type: AirbyteProtocolType?, useExpensiveSaferCasting: Boolean): Field<*> { + protected fun castedField( + field: Field<*>?, + type: AirbyteProtocolType, + useExpensiveSaferCasting: Boolean + ): Field<*> { return DSL.cast(field, toDialectType(type)) } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt index 6f5ccca8069f..d635050fe271 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcV1V2Migrator.kt @@ -10,51 +10,70 @@ import io.airbyte.commons.exceptions.SQLRuntimeException import io.airbyte.integrations.base.destination.typing_deduping.BaseDestinationV1V2Migrator import io.airbyte.integrations.base.destination.typing_deduping.NamespacedTableName import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig -import lombok.SneakyThrows import java.sql.DatabaseMetaData import java.sql.SQLException import java.util.* +import lombok.SneakyThrows /** * Largely based on * [io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV1V2Migrator]. */ -class JdbcV1V2Migrator(private val namingConventionTransformer: NamingConventionTransformer, private val database: JdbcDatabase, private val databaseName: String) : BaseDestinationV1V2Migrator() { +class JdbcV1V2Migrator( + private val namingConventionTransformer: NamingConventionTransformer, + private val database: JdbcDatabase, + private val databaseName: String +) : BaseDestinationV1V2Migrator() { @SneakyThrows override fun doesAirbyteInternalNamespaceExist(streamConfig: StreamConfig?): Boolean { - val retrievedSchema = database.executeMetadataQuery { dbMetadata: DatabaseMetaData? -> - try { - dbMetadata!!.getSchemas(databaseName, streamConfig!!.id.rawNamespace).use { columns -> - var schema: String? = "" - while (columns.next()) { - // Catalog can be null, so don't do anything with it. - // columns.getString("TABLE_CATALOG"); - schema = columns.getString("TABLE_SCHEM") + val retrievedSchema = + database.executeMetadataQuery { dbMetadata: DatabaseMetaData? -> + try { + dbMetadata!!.getSchemas(databaseName, streamConfig!!.id.rawNamespace).use { + columns -> + var schema = "" + while (columns.next()) { + // Catalog can be null, so don't do anything with it. + // columns.getString("TABLE_CATALOG"); + schema = columns.getString("TABLE_SCHEM") + } + return@executeMetadataQuery schema } - return@executeMetadataQuery schema + } catch (e: SQLException) { + throw SQLRuntimeException(e) } - } catch (e: SQLException) { - throw SQLRuntimeException(e) } - } return !retrievedSchema.isEmpty() } - override fun schemaMatchesExpectation(existingTable: TableDefinition, columns: Collection?): Boolean { - return existingTable.columns.keys.containsAll(columns!!) + override fun schemaMatchesExpectation( + existingTable: TableDefinition, + columns: Collection + ): Boolean { + return existingTable.columns.keys.containsAll(columns) } @SneakyThrows @Throws(Exception::class) - override fun getTableIfExists(namespace: String?, tableName: String?): Optional { - return JdbcDestinationHandler.Companion.findExistingTable(database, databaseName, namespace, tableName) + override fun getTableIfExists( + namespace: String?, + tableName: String? + ): Optional { + return JdbcDestinationHandler.Companion.findExistingTable( + database, + databaseName, + namespace, + tableName + ) } - override fun convertToV1RawName(streamConfig: StreamConfig?): NamespacedTableName { - @Suppress("deprecation") val tableName = namingConventionTransformer.getRawTableName(streamConfig!!.id.originalName!!) + override fun convertToV1RawName(streamConfig: StreamConfig): NamespacedTableName { + @Suppress("deprecation") + val tableName = namingConventionTransformer.getRawTableName(streamConfig.id.originalName!!) return NamespacedTableName( - namingConventionTransformer.getIdentifier(streamConfig.id.originalNamespace!!), - tableName) + namingConventionTransformer.getIdentifier(streamConfig.id.originalNamespace!!), + tableName + ) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt index ae214c8bbdf8..84b4dc6cb17b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/RawOnlySqlGenerator.kt @@ -19,28 +19,32 @@ import org.jooq.SQLDialect * TyperDeduper classes. This implementation appeases that requirement but does not implement any * "final" table operations. */ -class RawOnlySqlGenerator(private val namingTransformer: NamingConventionTransformer) : +class RawOnlySqlGenerator(namingTransformer: NamingConventionTransformer) : JdbcSqlGenerator(namingTransformer) { - override fun getStructType(): DataType<*>? { - throw NotImplementedError("This Destination does not support final tables") - } + override val structType: DataType<*> + get() { + throw NotImplementedError("This Destination does not support final tables") + } - override fun getArrayType(): DataType<*>? { - throw NotImplementedError("This Destination does not support final tables") - } + override val arrayType: DataType<*>? + get() { + throw NotImplementedError("This Destination does not support final tables") + } - override fun getWidestType(): DataType<*>? { - throw NotImplementedError("This Destination does not support final tables") - } + override val widestType: DataType<*>? + get() { + throw NotImplementedError("This Destination does not support final tables") + } - override fun getDialect(): SQLDialect? { - throw NotImplementedError("This Destination does not support final tables") - } + override val dialect: SQLDialect? + get() { + throw NotImplementedError("This Destination does not support final tables") + } override fun extractRawDataFields( columns: LinkedHashMap, useExpensiveSaferCasting: Boolean, - ): List>? { + ): MutableList> { throw NotImplementedError("This Destination does not support final tables") } @@ -53,9 +57,9 @@ class RawOnlySqlGenerator(private val namingTransformer: NamingConventionTransfo } override fun getRowNumber( - primaryKey: List, + primaryKey: List?, cursorField: Optional, - ): Field? { + ): Field { throw NotImplementedError("This Destination does not support final tables") } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt index 46982441cc24..e516d50eef01 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.kt @@ -13,61 +13,87 @@ import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair import io.airbyte.protocol.models.v0.DestinationSyncMode import io.airbyte.protocol.models.v0.StreamDescriptor -import lombok.extern.slf4j.Slf4j +import io.github.oshai.kotlinlogging.KotlinLogging import java.util.* -/** - * Functions and logic common to all flushing strategies. - */ -@Slf4j +private val log = KotlinLogging.logger {} +/** Functions and logic common to all flushing strategies. */ object GeneralStagingFunctions { // using a random string here as a placeholder for the moment. // This would avoid mixing data in the staging area between different syncs (especially if they // manipulate streams with similar names) - // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to + // if we replaced the random connection id by the actual connection_id, we'd gain the + // opportunity to // leverage data that was uploaded to stage - // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead. + // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) + // instead. // This would also allow other programs/scripts // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. val RANDOM_CONNECTION_ID: UUID = UUID.randomUUID() - fun onStartFunction(database: JdbcDatabase?, - stagingOperations: StagingOperations, - writeConfigs: List, - typerDeduper: TyperDeduper): OnStartFunction { + fun onStartFunction( + database: JdbcDatabase, + stagingOperations: StagingOperations, + writeConfigs: List, + typerDeduper: TyperDeduper + ): OnStartFunction { return OnStartFunction { - GeneralStagingFunctions.log.info("Preparing raw tables in destination started for {} streams", writeConfigs.size) + log.info( + "Preparing raw tables in destination started for {} streams", + writeConfigs.size + ) typerDeduper.prepareSchemasAndRunMigrations() // Create raw tables - val queryList: MutableList = ArrayList() + val queryList: MutableList = ArrayList() for (writeConfig in writeConfigs) { val schema = writeConfig.outputSchemaName val stream = writeConfig.streamName val dstTableName = writeConfig.outputTableName val stageName = stagingOperations.getStageName(schema, dstTableName) val stagingPath = - stagingOperations.getStagingPath(SerialStagingConsumerFactory.Companion.RANDOM_CONNECTION_ID, schema, stream, writeConfig.outputTableName, - writeConfig.writeDatetime) + stagingOperations.getStagingPath( + SerialStagingConsumerFactory.Companion.RANDOM_CONNECTION_ID, + schema, + stream, + writeConfig.outputTableName, + writeConfig.writeDatetime + ) - GeneralStagingFunctions.log.info("Preparing staging area in destination started for schema {} stream {}: target table: {}, stage: {}", - schema, stream, dstTableName, stagingPath) + log.info( + "Preparing staging area in destination started for schema {} stream {}: target table: {}, stage: {}", + schema, + stream, + dstTableName, + stagingPath + ) stagingOperations.createSchemaIfNotExists(database, schema) stagingOperations.createTableIfNotExists(database, schema, dstTableName) stagingOperations.createStageIfNotExists(database, stageName) when (writeConfig.syncMode) { - DestinationSyncMode.OVERWRITE -> queryList.add(stagingOperations.truncateTableQuery(database, schema, dstTableName)) - DestinationSyncMode.APPEND, DestinationSyncMode.APPEND_DEDUP -> {} - else -> throw IllegalStateException("Unrecognized sync mode: " + writeConfig.syncMode) + DestinationSyncMode.OVERWRITE -> + queryList.add( + stagingOperations.truncateTableQuery(database, schema, dstTableName) + ) + DestinationSyncMode.APPEND, + DestinationSyncMode.APPEND_DEDUP -> {} + else -> + throw IllegalStateException( + "Unrecognized sync mode: " + writeConfig.syncMode + ) } - GeneralStagingFunctions.log.info("Preparing staging area in destination completed for schema {} stream {}", schema, stream) + log.info( + "Preparing staging area in destination completed for schema {} stream {}", + schema, + stream + ) } typerDeduper.prepareFinalTables() - GeneralStagingFunctions.log.info("Executing finalization of tables.") + log.info("Executing finalization of tables.") stagingOperations.executeTransaction(database, queryList) } } @@ -77,23 +103,32 @@ object GeneralStagingFunctions { * upload was unsuccessful */ @Throws(Exception::class) - fun copyIntoTableFromStage(database: JdbcDatabase?, - stageName: String?, - stagingPath: String?, - stagedFiles: List?, - tableName: String?, - schemaName: String?, - stagingOperations: StagingOperations, - streamNamespace: String?, - streamName: String?, - typerDeduperValve: TypeAndDedupeOperationValve, - typerDeduper: TyperDeduper) { + fun copyIntoTableFromStage( + database: JdbcDatabase?, + stageName: String?, + stagingPath: String?, + stagedFiles: List?, + tableName: String?, + schemaName: String?, + stagingOperations: StagingOperations, + streamNamespace: String?, + streamName: String?, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper + ) { try { - val rawTableInsertLock = typerDeduper.getRawTableInsertLock(streamNamespace!!, streamName!!) + val rawTableInsertLock = + typerDeduper.getRawTableInsertLock(streamNamespace!!, streamName!!) rawTableInsertLock.lock() try { - stagingOperations.copyIntoTableFromStage(database, stageName, stagingPath, stagedFiles, - tableName, schemaName) + stagingOperations.copyIntoTableFromStage( + database, + stageName, + stagingPath, + stagedFiles, + tableName, + schemaName + ) } finally { rawTableInsertLock.unlock() } @@ -113,41 +148,55 @@ object GeneralStagingFunctions { * Tear down process, will attempt to try to clean out any staging area * * @param database database used for syncing - * @param stagingOperations collection of SQL queries necessary for writing data into a staging area + * @param stagingOperations collection of SQL queries necessary for writing data into a staging + * area * @param writeConfigs configuration settings for all destination connectors needed to write * @param purgeStagingData drop staging area if true, keep otherwise * @return */ - fun onCloseFunction(database: JdbcDatabase?, - stagingOperations: StagingOperations, - writeConfigs: List, - purgeStagingData: Boolean, - typerDeduper: TyperDeduper): OnCloseFunction { - return OnCloseFunction { hasFailed: Boolean?, streamSyncSummaries: Map? -> - // After moving data from staging area to the target table (airybte_raw) clean up the staging + fun onCloseFunction( + database: JdbcDatabase?, + stagingOperations: StagingOperations, + writeConfigs: List, + purgeStagingData: Boolean, + typerDeduper: TyperDeduper + ): OnCloseFunction { + return OnCloseFunction { + hasFailed: Boolean, + streamSyncSummaries: Map -> + // After moving data from staging area to the target table (airybte_raw) clean up the + // staging // area (if user configured) - GeneralStagingFunctions.log.info("Cleaning up destination started for {} streams", writeConfigs.size) + log.info("Cleaning up destination started for {} streams", writeConfigs.size) typerDeduper.typeAndDedupe(streamSyncSummaries) for (writeConfig in writeConfigs) { val schemaName = writeConfig.outputSchemaName if (purgeStagingData) { - val stageName = stagingOperations.getStageName(schemaName, writeConfig.outputTableName) - val stagePath = stagingOperations.getStagingPath( + val stageName = + stagingOperations.getStageName(schemaName, writeConfig.outputTableName) + val stagePath = + stagingOperations.getStagingPath( RANDOM_CONNECTION_ID, schemaName, writeConfig.streamName, writeConfig.outputTableName, - writeConfig.writeDatetime) - GeneralStagingFunctions.log.info("Cleaning stage in destination started for stream {}. schema {}, stage: {}", writeConfig.streamName, schemaName, - stagePath) - // TODO: This is another weird manifestation of Redshift vs Snowflake using either or variables from + writeConfig.writeDatetime + ) + log.info( + "Cleaning stage in destination started for stream {}. schema {}, stage: {}", + writeConfig.streamName, + schemaName, + stagePath + ) + // TODO: This is another weird manifestation of Redshift vs Snowflake using + // either or variables from // stageName/StagingPath. stagingOperations.dropStageIfExists(database, stageName, stagePath) } } typerDeduper.commitFinalTables() typerDeduper.cleanup() - GeneralStagingFunctions.log.info("Cleaning up destination completed.") + log.info("Cleaning up destination completed.") } } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt index 832796d09090..335cc1fa004d 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialFlush.kt @@ -14,84 +14,125 @@ import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOpe import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog -import lombok.extern.slf4j.Slf4j -import org.apache.commons.io.FileUtils +import io.github.oshai.kotlinlogging.KotlinLogging import java.util.stream.Collectors +import org.apache.commons.io.FileUtils + +private val log = KotlinLogging.logger {} /** * Serial flushing logic. Though simpler, this causes unnecessary backpressure and slows down the * entire pipeline. * - * - * Note: This class should be re-written so that is implements the [FlushBufferFunction] - * interface, instead of return an anonymous function implementing this interface for clarity. As of - * this writing, we avoid doing so to simplify the migration to async flushing. + * Note: This class should be re-written so that is implements the [FlushBufferFunction] interface, + * instead of return an anonymous function implementing this interface for clarity. As of this + * writing, we avoid doing so to simplify the migration to async flushing. */ -@Slf4j object SerialFlush { /** - * Logic handling how destinations with staging areas (aka bucket storages) will flush their buffer + * Logic handling how destinations with staging areas (aka bucket storages) will flush their + * buffer * * @param database database used for syncing - * @param stagingOperations collection of SQL queries necessary for writing data into a staging area + * @param stagingOperations collection of SQL queries necessary for writing data into a staging + * area * @param writeConfigs configuration settings for all destination connectors needed to write * @param catalog collection of configured streams (e.g. API endpoints or database tables) * @return */ @VisibleForTesting fun function( - database: JdbcDatabase?, - stagingOperations: StagingOperations, - writeConfigs: List, - catalog: ConfiguredAirbyteCatalog, - typerDeduperValve: TypeAndDedupeOperationValve, - typerDeduper: TyperDeduper): FlushBufferFunction { - // TODO: (ryankfu) move this block of code that executes before the lambda to #onStartFunction - val conflictingStreams: MutableSet = HashSet() + database: JdbcDatabase?, + stagingOperations: StagingOperations, + writeConfigs: List, + catalog: ConfiguredAirbyteCatalog, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper + ): FlushBufferFunction { + // TODO: (ryankfu) move this block of code that executes before the lambda to + // #onStartFunction + val conflictingStreams: MutableSet = HashSet() val pairToWriteConfig: MutableMap = HashMap() for (config in writeConfigs) { val streamIdentifier = toNameNamespacePair(config) if (pairToWriteConfig.containsKey(streamIdentifier)) { conflictingStreams.add(config) - val existingConfig = pairToWriteConfig[streamIdentifier] - // The first conflicting stream won't have any problems, so we need to explicitly add it here. + val existingConfig = pairToWriteConfig.getValue(streamIdentifier) + // The first conflicting stream won't have any problems, so we need to explicitly + // add it here. conflictingStreams.add(existingConfig) } else { pairToWriteConfig[streamIdentifier] = config } } if (!conflictingStreams.isEmpty()) { - val message = String.format( + val message = + String.format( "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using \${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: %s", - conflictingStreams.stream().map { config: WriteConfig? -> config.getNamespace() + "." + config.getStreamName() }.collect(Collectors.joining(", "))) + conflictingStreams + .stream() + .map { config: WriteConfig -> config.namespace + "." + config.streamName } + .collect(Collectors.joining(", ")) + ) throw ConfigErrorException(message) } - return FlushBufferFunction { pair: AirbyteStreamNameNamespacePair, writer: SerializableBuffer -> - SerialFlush.log.info("Flushing buffer for stream {} ({}) to staging", pair.name, FileUtils.byteCountToDisplaySize(writer.byteCount)) - require(pairToWriteConfig.containsKey(pair)) { String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s", Jsons.serialize(catalog)) } + return FlushBufferFunction { + pair: AirbyteStreamNameNamespacePair, + writer: SerializableBuffer -> + log.info( + "Flushing buffer for stream {} ({}) to staging", + pair.name, + FileUtils.byteCountToDisplaySize(writer.byteCount) + ) + require(pairToWriteConfig.containsKey(pair)) { + String.format( + "Message contained record from a stream that was not in the catalog. \ncatalog: %s", + Jsons.serialize(catalog) + ) + } - val writeConfig = pairToWriteConfig[pair] - val schemaName = writeConfig.getOutputSchemaName() - val stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName()) + val writeConfig = pairToWriteConfig.getValue(pair) + val schemaName = writeConfig.outputSchemaName + val stageName = stagingOperations.getStageName(schemaName, writeConfig.outputTableName) val stagingPath = - stagingOperations.getStagingPath( - SerialStagingConsumerFactory.Companion.RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(), - writeConfig.getOutputTableName(), writeConfig.getWriteDatetime()) + stagingOperations.getStagingPath( + SerialStagingConsumerFactory.Companion.RANDOM_CONNECTION_ID, + schemaName, + writeConfig.streamName, + writeConfig.outputTableName, + writeConfig.writeDatetime + ) try { writer.use { writer.flush() - val stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stageName, stagingPath) - GeneralStagingFunctions.copyIntoTableFromStage(database, stageName, stagingPath, java.util.List.of(stagedFile), writeConfig.getOutputTableName(), + val stagedFile = + stagingOperations.uploadRecordsToStage( + database, + writer, schemaName, - stagingOperations, - writeConfig.getNamespace(), - writeConfig.getStreamName(), - typerDeduperValve, - typerDeduper) + stageName, + stagingPath + ) + GeneralStagingFunctions.copyIntoTableFromStage( + database, + stageName, + stagingPath, + java.util.List.of(stagedFile), + writeConfig.outputTableName, + schemaName, + stagingOperations, + writeConfig.namespace, + writeConfig.streamName, + typerDeduperValve, + typerDeduper + ) } } catch (e: Exception) { - SerialFlush.log.error("Failed to flush and commit buffer data into destination's raw table", e) - throw RuntimeException("Failed to upload buffer to stage and commit to destination", e) + log.error("Failed to flush and commit buffer data into destination's raw table", e) + throw RuntimeException( + "Failed to upload buffer to stage and commit to destination", + e + ) } } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt index dc1d2f3dccff..ac19faf27523 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactory.kt @@ -19,64 +19,98 @@ import io.airbyte.protocol.models.v0.AirbyteMessage import io.airbyte.protocol.models.v0.AirbyteStream import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.time.Instant import java.util.* import java.util.function.Consumer import java.util.function.Function import java.util.stream.Collectors +import org.slf4j.Logger +import org.slf4j.LoggerFactory /** * Uses both Factory and Consumer design pattern to create a single point of creation for consuming * [AirbyteMessage] for processing */ open class SerialStagingConsumerFactory { - fun create(outputRecordCollector: Consumer?, - database: JdbcDatabase?, - stagingOperations: StagingOperations, - namingResolver: NamingConventionTransformer, - onCreateBuffer: BufferCreateFunction?, - config: JsonNode, - catalog: ConfiguredAirbyteCatalog, - purgeStagingData: Boolean, - typerDeduperValve: TypeAndDedupeOperationValve, - typerDeduper: TyperDeduper, - parsedCatalog: ParsedCatalog, - defaultNamespace: String?, - useDestinationsV2Columns: Boolean): AirbyteMessageConsumer { - val writeConfigs = createWriteConfigs(namingResolver, config, catalog, parsedCatalog, useDestinationsV2Columns) + fun create( + outputRecordCollector: Consumer, + database: JdbcDatabase, + stagingOperations: StagingOperations, + namingResolver: NamingConventionTransformer, + onCreateBuffer: BufferCreateFunction, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + purgeStagingData: Boolean, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper, + parsedCatalog: ParsedCatalog, + defaultNamespace: String?, + useDestinationsV2Columns: Boolean + ): AirbyteMessageConsumer { + val writeConfigs = + createWriteConfigs( + namingResolver, + config, + catalog, + parsedCatalog, + useDestinationsV2Columns + ) return BufferedStreamConsumer( - outputRecordCollector, - GeneralStagingFunctions.onStartFunction(database, stagingOperations, writeConfigs, typerDeduper), - SerializedBufferingStrategy( - onCreateBuffer!!, - catalog, - SerialFlush.function(database, stagingOperations, writeConfigs, catalog, typerDeduperValve, typerDeduper)), - GeneralStagingFunctions.onCloseFunction(database, stagingOperations, writeConfigs, purgeStagingData, typerDeduper), + outputRecordCollector, + GeneralStagingFunctions.onStartFunction( + database, + stagingOperations, + writeConfigs, + typerDeduper + ), + SerializedBufferingStrategy( + onCreateBuffer, catalog, - { data: JsonNode? -> stagingOperations.isValidData(data) }, - defaultNamespace) + SerialFlush.function( + database, + stagingOperations, + writeConfigs, + catalog, + typerDeduperValve, + typerDeduper + ) + ), + GeneralStagingFunctions.onCloseFunction( + database, + stagingOperations, + writeConfigs, + purgeStagingData, + typerDeduper + ), + catalog, + { data: JsonNode? -> stagingOperations.isValidData(data) }, + defaultNamespace + ) } companion object { - private val LOGGER: Logger = LoggerFactory.getLogger(SerialStagingConsumerFactory::class.java) + private val LOGGER: Logger = + LoggerFactory.getLogger(SerialStagingConsumerFactory::class.java) // using a random string here as a placeholder for the moment. - // This would avoid mixing data in the staging area between different syncs (especially if they + // This would avoid mixing data in the staging area between different syncs (especially if + // they // manipulate streams with similar names) - // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to + // if we replaced the random connection id by the actual connection_id, we'd gain the + // opportunity to // leverage data that was uploaded to stage - // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead. + // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) + // instead. // This would also allow other programs/scripts - // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. + // to load (or reload backups?) in the connection's staging area to be loaded at the next + // sync. private val SYNC_DATETIME: Instant = Instant.now() val RANDOM_CONNECTION_ID: UUID = UUID.randomUUID() /** - * Creates a list of all [WriteConfig] for each stream within a - * [ConfiguredAirbyteCatalog]. Each write config represents the configuration settings for - * writing to a destination connector + * Creates a list of all [WriteConfig] for each stream within a [ConfiguredAirbyteCatalog]. + * Each write config represents the configuration settings for writing to a destination + * connector * * @param namingResolver [NamingConventionTransformer] used to transform names that are * acceptable by each destination connector @@ -85,48 +119,68 @@ open class SerialStagingConsumerFactory { * [ConfiguredAirbyteStream] * @return list of all write configs for each stream in a [ConfiguredAirbyteCatalog] */ - private fun createWriteConfigs(namingResolver: NamingConventionTransformer, - config: JsonNode, - catalog: ConfiguredAirbyteCatalog, - parsedCatalog: ParsedCatalog, - useDestinationsV2Columns: Boolean): List { - return catalog.streams.stream().map(toWriteConfig(namingResolver, config, parsedCatalog, useDestinationsV2Columns)).collect(Collectors.toList()) + private fun createWriteConfigs( + namingResolver: NamingConventionTransformer, + config: JsonNode, + catalog: ConfiguredAirbyteCatalog, + parsedCatalog: ParsedCatalog, + useDestinationsV2Columns: Boolean + ): List { + return catalog.streams + .stream() + .map(toWriteConfig(namingResolver, config, parsedCatalog, useDestinationsV2Columns)) + .collect(Collectors.toList()) } - private fun toWriteConfig(namingResolver: NamingConventionTransformer, - config: JsonNode, - parsedCatalog: ParsedCatalog, - useDestinationsV2Columns: Boolean): Function { + private fun toWriteConfig( + namingResolver: NamingConventionTransformer, + config: JsonNode, + parsedCatalog: ParsedCatalog, + useDestinationsV2Columns: Boolean + ): Function { return Function { stream: ConfiguredAirbyteStream -> - Preconditions.checkNotNull(stream.destinationSyncMode, "Undefined destination sync mode") + Preconditions.checkNotNull( + stream.destinationSyncMode, + "Undefined destination sync mode" + ) val abStream = stream.stream val streamName = abStream.name val outputSchema: String - val tableName: String + val tableName: String? if (useDestinationsV2Columns) { val streamId = parsedCatalog.getStream(abStream.namespace, streamName).id - outputSchema = streamId.rawNamespace + outputSchema = streamId.rawNamespace!! tableName = streamId.rawName } else { - outputSchema = getOutputSchema(abStream, config["schema"].asText(), namingResolver) + outputSchema = + getOutputSchema(abStream, config["schema"].asText(), namingResolver) tableName = namingResolver.getRawTableName(streamName) } val tmpTableName = namingResolver.getTmpTableName(streamName) val syncMode = stream.destinationSyncMode val writeConfig = - WriteConfig(streamName, abStream.namespace, outputSchema, tmpTableName, tableName, syncMode, SYNC_DATETIME) + WriteConfig( + streamName, + abStream.namespace, + outputSchema, + tmpTableName, + tableName, + syncMode, + SYNC_DATETIME + ) LOGGER.info("Write config: {}", writeConfig) writeConfig } } - private fun getOutputSchema(stream: AirbyteStream, - defaultDestSchema: String, - namingResolver: NamingConventionTransformer): String { - return if (stream.namespace != null - ) namingResolver.getNamespace(stream.namespace) + private fun getOutputSchema( + stream: AirbyteStream, + defaultDestSchema: String, + namingResolver: NamingConventionTransformer + ): String { + return if (stream.namespace != null) namingResolver.getNamespace(stream.namespace) else namingResolver.getNamespace(defaultDestSchema) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt index 32f36ebb7617..c99b55685a63 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestinationTest.kt @@ -18,28 +18,45 @@ import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migrat import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test +import org.mockito.Mockito.mock class AbstractJdbcDestinationTest { private fun buildConfigNoJdbcParameters(): JsonNode { - return Jsons.jsonNode(ImmutableMap.of( - JdbcUtils.HOST_KEY, "localhost", - JdbcUtils.PORT_KEY, 1337, - JdbcUtils.USERNAME_KEY, "user", - JdbcUtils.DATABASE_KEY, "db")) + return Jsons.jsonNode( + ImmutableMap.of( + JdbcUtils.HOST_KEY, + "localhost", + JdbcUtils.PORT_KEY, + 1337, + JdbcUtils.USERNAME_KEY, + "user", + JdbcUtils.DATABASE_KEY, + "db" + ) + ) } private fun buildConfigWithExtraJdbcParameters(extraParam: String): JsonNode { - return Jsons.jsonNode(ImmutableMap.of( - JdbcUtils.HOST_KEY, "localhost", - JdbcUtils.PORT_KEY, 1337, - JdbcUtils.USERNAME_KEY, "user", - JdbcUtils.DATABASE_KEY, "db", - JdbcUtils.JDBC_URL_PARAMS_KEY, extraParam)) + return Jsons.jsonNode( + ImmutableMap.of( + JdbcUtils.HOST_KEY, + "localhost", + JdbcUtils.PORT_KEY, + 1337, + JdbcUtils.USERNAME_KEY, + "user", + JdbcUtils.DATABASE_KEY, + "db", + JdbcUtils.JDBC_URL_PARAMS_KEY, + extraParam + ) + ) } @Test fun testNoExtraParamsNoDefault() { - val connectionProperties = TestJdbcDestination().getConnectionProperties(buildConfigNoJdbcParameters()) + val connectionProperties = + TestJdbcDestination().getConnectionProperties(buildConfigNoJdbcParameters()) val expectedProperties: Map = ImmutableMap.of() Assertions.assertEquals(expectedProperties, connectionProperties) @@ -49,8 +66,9 @@ class AbstractJdbcDestinationTest { fun testNoExtraParamsWithDefault() { val defaultProperties: Map = ImmutableMap.of("A_PARAMETER", "A_VALUE") - val connectionProperties = TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigNoJdbcParameters()) + val connectionProperties = + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigNoJdbcParameters()) Assertions.assertEquals(defaultProperties, connectionProperties) } @@ -58,12 +76,11 @@ class AbstractJdbcDestinationTest { @Test fun testExtraParamNoDefault() { val extraParam = "key1=value1&key2=value2&key3=value3" - val connectionProperties = TestJdbcDestination().getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)) - val expectedProperties: Map = ImmutableMap.of( - "key1", "value1", - "key2", "value2", - "key3", "value3") + val connectionProperties = + TestJdbcDestination() + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + val expectedProperties: Map = + ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value3") Assertions.assertEquals(expectedProperties, connectionProperties) } @@ -71,13 +88,20 @@ class AbstractJdbcDestinationTest { fun testExtraParamWithDefault() { val defaultProperties: Map = ImmutableMap.of("A_PARAMETER", "A_VALUE") val extraParam = "key1=value1&key2=value2&key3=value3" - val connectionProperties = TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)) - val expectedProperties: Map = ImmutableMap.of( - "A_PARAMETER", "A_VALUE", - "key1", "value1", - "key2", "value2", - "key3", "value3") + val connectionProperties = + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + val expectedProperties: Map = + ImmutableMap.of( + "A_PARAMETER", + "A_VALUE", + "key1", + "value1", + "key2", + "value2", + "key3", + "value3" + ) Assertions.assertEquals(expectedProperties, connectionProperties) } @@ -85,12 +109,11 @@ class AbstractJdbcDestinationTest { fun testExtraParameterEqualToDefault() { val defaultProperties: Map = ImmutableMap.of("key1", "value1") val extraParam = "key1=value1&key2=value2&key3=value3" - val connectionProperties = TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)) - val expectedProperties: Map = ImmutableMap.of( - "key1", "value1", - "key2", "value2", - "key3", "value3") + val connectionProperties = + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + val expectedProperties: Map = + ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value3") Assertions.assertEquals(expectedProperties, connectionProperties) } @@ -100,19 +123,28 @@ class AbstractJdbcDestinationTest { val extraParam = "key1=value1&key2=value2&key3=value3" Assertions.assertThrows(IllegalArgumentException::class.java) { - TestJdbcDestination(defaultProperties).getConnectionProperties( - buildConfigWithExtraJdbcParameters(extraParam)) + TestJdbcDestination(defaultProperties) + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) } } @Test fun testInvalidExtraParam() { val extraParam = "key1=value1&sdf&" - Assertions.assertThrows(ConfigErrorException::class.java - ) { TestJdbcDestination().getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) } + Assertions.assertThrows(ConfigErrorException::class.java) { + TestJdbcDestination() + .getConnectionProperties(buildConfigWithExtraJdbcParameters(extraParam)) + } } - internal class TestJdbcDestination @JvmOverloads constructor(private val defaultProperties: Map = HashMap()) : AbstractJdbcDestination("", StandardNameTransformer(), TestJdbcSqlOperations()) { + internal class TestJdbcDestination + @JvmOverloads + constructor(private val defaultProperties: Map = HashMap()) : + AbstractJdbcDestination( + "", + StandardNameTransformer(), + TestJdbcSqlOperations() + ) { override fun getDefaultConnectionProperties(config: JsonNode): Map { return defaultProperties } @@ -121,20 +153,26 @@ class AbstractJdbcDestinationTest { return config } - override fun getSqlGenerator(): JdbcSqlGenerator { - // TODO do we need to populate this? - return null - } + override val sqlGenerator: JdbcSqlGenerator = mock() - override fun getDestinationHandler(databaseName: String, database: JdbcDatabase, rawTableSchema: String): JdbcDestinationHandler? { - return null + override fun getDestinationHandler( + databaseName: String, + database: JdbcDatabase, + rawTableSchema: String + ): JdbcDestinationHandler { + return mock() } - protected override fun getMigrations(database: JdbcDatabase, - databaseName: String, - sqlGenerator: SqlGenerator, - destinationHandler: DestinationHandler): List> { + override fun getMigrations( + database: JdbcDatabase, + databaseName: String, + sqlGenerator: SqlGenerator, + destinationHandler: DestinationHandler + ): List> { return emptyList() } + + public override fun getConnectionProperties(config: JsonNode): Map = + super.getConnectionProperties(config) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt index fcf532ea8265..0f188975a419 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/DataAdapterTest.kt @@ -5,12 +5,15 @@ package io.airbyte.cdk.integrations.destination.jdbc import com.fasterxml.jackson.databind.JsonNode import io.airbyte.commons.json.Jsons +import java.util.function.Function import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test -import java.util.function.Function internal class DataAdapterTest { - private val testData: JsonNode = Jsons.deserialize("{\"attr1\" : \"CCC\", \"obj1\" : [{\"sub1\" : \"BBB\"}, {\"sub1\" : \"CCC\"}]}") + private val testData: JsonNode = + Jsons.deserialize( + "{\"attr1\" : \"CCC\", \"obj1\" : [{\"sub1\" : \"BBB\"}, {\"sub1\" : \"CCC\"}]}" + ) private val replaceCCCFunction = Function { jsonNode: JsonNode -> if (jsonNode.isTextual) { val textValue = jsonNode.textValue().replace("CCC".toRegex(), "FFF") @@ -30,7 +33,13 @@ internal class DataAdapterTest { @Test fun checkSkip() { val data = testData.deepCopy() - val adapter = DataAdapter({ jsonNode: JsonNode -> jsonNode.isTextual && jsonNode.textValue().contains("BBB") }, replaceCCCFunction) + val adapter = + DataAdapter( + { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue().contains("BBB") + }, + replaceCCCFunction + ) adapter.adapt(data) Assertions.assertEquals(testData, data) @@ -39,12 +48,26 @@ internal class DataAdapterTest { @Test fun checkAdapt() { val data = testData.deepCopy() - val adapter = DataAdapter({ jsonNode: JsonNode -> jsonNode.isTextual && jsonNode.textValue().contains("CCC") }, replaceCCCFunction) + val adapter = + DataAdapter( + { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue().contains("CCC") + }, + replaceCCCFunction + ) adapter.adapt(data) println(data) Assertions.assertNotEquals(testData, data) - assert(data.findValues("sub1").stream().anyMatch { jsonNode: JsonNode -> jsonNode.isTextual && jsonNode.textValue() == "FFF" }) - assert(data.findValues("attr1").stream().anyMatch { jsonNode: JsonNode -> jsonNode.isTextual && jsonNode.textValue() == "FFF" }) + assert( + data.findValues("sub1").stream().anyMatch { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue() == "FFF" + } + ) + assert( + data.findValues("attr1").stream().anyMatch { jsonNode: JsonNode -> + jsonNode.isTextual && jsonNode.textValue() == "FFF" + } + ) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt index e0dc2fe95710..75c32ceba400 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/TestJdbcSqlOperations.kt @@ -5,25 +5,29 @@ package io.airbyte.cdk.integrations.destination.jdbc import io.airbyte.cdk.db.jdbc.JdbcDatabase import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage +import java.sql.SQLException import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test import org.mockito.Mockito -import java.sql.SQLException class TestJdbcSqlOperations : JdbcSqlOperations() { @Throws(Exception::class) - public override fun insertRecordsInternal(database: JdbcDatabase, - records: List, - schemaName: String, - tableName: String) { + public override fun insertRecordsInternal( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) { // Not required for the testing } @Throws(Exception::class) - override fun insertRecordsInternalV2(database: JdbcDatabase, - records: List, - schemaName: String, - tableName: String) { + override fun insertRecordsInternalV2( + database: JdbcDatabase, + records: List, + schemaName: String?, + tableName: String? + ) { // Not required for the testing } @@ -34,10 +38,14 @@ class TestJdbcSqlOperations : JdbcSqlOperations() { try { Mockito.doThrow(SQLException("TEST")).`when`(db).execute(Mockito.anyString()) } catch (e: Exception) { - // This would not be expected, but the `execute` method above will flag as an unhandled exception + // This would not be expected, but the `execute` method above will flag as an unhandled + // exception assert(false) } - val exception = Assertions.assertThrows(SQLException::class.java) { createSchemaIfNotExists(db, schemaName) } + val exception = + Assertions.assertThrows(SQLException::class.java) { + createSchemaIfNotExists(db, schemaName) + } Assertions.assertEquals(exception.message, "TEST") } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt index ceac1d1b43a5..214d181fab2b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/SwitchingDestinationTest.kt @@ -9,9 +9,9 @@ import io.airbyte.cdk.integrations.base.Destination import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test -import org.mockito.ArgumentMatchers import org.mockito.Mockito -import java.util.function.Consumer +import org.mockito.Mockito.mock +import org.mockito.kotlin.any internal class SwitchingDestinationTest { internal enum class SwitchingEnum { @@ -19,48 +19,70 @@ internal class SwitchingDestinationTest { COPY } - private var insertDestination: Destination? = null - private var copyDestination: Destination? = null - private var destinationMap: Map? = null + private lateinit var insertDestination: Destination + private lateinit var copyDestination: Destination + private lateinit var destinationMap: Map @BeforeEach fun setUp() { insertDestination = Mockito.mock(Destination::class.java) copyDestination = Mockito.mock(Destination::class.java) - destinationMap = ImmutableMap.of( - SwitchingEnum.INSERT, insertDestination, - SwitchingEnum.COPY, copyDestination) + destinationMap = + ImmutableMap.of( + SwitchingEnum.INSERT, + insertDestination, + SwitchingEnum.COPY, + copyDestination + ) } @Test @Throws(Exception::class) fun testInsert() { - val switchingDestination = SwitchingDestination(SwitchingEnum::class.java, { c: JsonNode? -> SwitchingEnum.INSERT }, destinationMap) + val switchingDestination = + SwitchingDestination( + SwitchingEnum::class.java, + { c: JsonNode? -> SwitchingEnum.INSERT }, + destinationMap + ) - switchingDestination.getConsumer(Mockito.mock(JsonNode::class.java), Mockito.mock(ConfiguredAirbyteCatalog::class.java), Mockito.mock(Consumer::class.java)) + switchingDestination.getConsumer( + Mockito.mock(JsonNode::class.java), + Mockito.mock(ConfiguredAirbyteCatalog::class.java), + mock() + ) - Mockito.verify(insertDestination, Mockito.times(1)).getConsumer(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any()) - Mockito.verify(copyDestination, Mockito.times(0)).getConsumer(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any()) + Mockito.verify(insertDestination, Mockito.times(1)).getConsumer(any(), any(), any()) + Mockito.verify(copyDestination, Mockito.times(0)).getConsumer(any(), any(), any()) switchingDestination.check(Mockito.mock(JsonNode::class.java)) - Mockito.verify(insertDestination, Mockito.times(1)).check(ArgumentMatchers.any()) - Mockito.verify(copyDestination, Mockito.times(0)).check(ArgumentMatchers.any()) + Mockito.verify(insertDestination, Mockito.times(1)).check(any()) + Mockito.verify(copyDestination, Mockito.times(0)).check(any()) } @Test @Throws(Exception::class) fun testCopy() { - val switchingDestination = SwitchingDestination(SwitchingEnum::class.java, { c: JsonNode? -> SwitchingEnum.COPY }, destinationMap) + val switchingDestination = + SwitchingDestination( + SwitchingEnum::class.java, + { c: JsonNode? -> SwitchingEnum.COPY }, + destinationMap + ) - switchingDestination.getConsumer(Mockito.mock(JsonNode::class.java), Mockito.mock(ConfiguredAirbyteCatalog::class.java), Mockito.mock(Consumer::class.java)) + switchingDestination.getConsumer( + Mockito.mock(JsonNode::class.java), + Mockito.mock(ConfiguredAirbyteCatalog::class.java), + Mockito.mock() + ) - Mockito.verify(insertDestination, Mockito.times(0)).getConsumer(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any()) - Mockito.verify(copyDestination, Mockito.times(1)).getConsumer(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any()) + Mockito.verify(insertDestination, Mockito.times(0)).getConsumer(any(), any(), any()) + Mockito.verify(copyDestination, Mockito.times(1)).getConsumer(any(), any(), any()) switchingDestination.check(Mockito.mock(JsonNode::class.java)) - Mockito.verify(insertDestination, Mockito.times(0)).check(ArgumentMatchers.any()) - Mockito.verify(copyDestination, Mockito.times(1)).check(ArgumentMatchers.any()) + Mockito.verify(insertDestination, Mockito.times(0)).check(any()) + Mockito.verify(copyDestination, Mockito.times(1)).check(any()) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt index a5fef9bf74a0..5684804c0ba7 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/destination/staging/SerialStagingConsumerFactoryTest.kt @@ -5,29 +5,46 @@ package io.airbyte.cdk.integrations.destination.staging import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig import io.airbyte.commons.exceptions.ConfigErrorException +import java.util.List import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test -import java.util.List +import org.mockito.Mockito.mock internal class SerialStagingConsumerFactoryTest { @Test fun detectConflictingStreams() { - val configErrorException = Assertions.assertThrows( - ConfigErrorException::class.java - ) { - SerialFlush.function( - null, + val configErrorException = + Assertions.assertThrows(ConfigErrorException::class.java) { + SerialFlush.function( null, + mock(), List.of( - WriteConfig("example_stream", "source_schema", "destination_default_schema", null, null, null), - WriteConfig("example_stream", "source_schema", "destination_default_schema", null, null, null)), - null, - null, - null) - } + WriteConfig( + "example_stream", + "source_schema", + "destination_default_schema", + null, + null, + mock() + ), + WriteConfig( + "example_stream", + "source_schema", + "destination_default_schema", + null, + null, + mock() + ) + ), + mock(), + mock(), + mock() + ) + } Assertions.assertEquals( - "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using \${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: source_schema.example_stream, source_schema.example_stream", - configErrorException.message) + "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using \${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: source_schema.example_stream, source_schema.example_stream", + configErrorException.message + ) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt index 6e0999c70b01..e3f882029b25 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/test/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespacesTest.kt @@ -3,35 +3,53 @@ */ package io.airbyte.cdk.integrations.standardtest.destination -import org.junit.jupiter.api.Assertions -import org.junit.jupiter.api.Test import java.time.Instant import java.time.ZoneId import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test internal class TestingNamespacesTest { @Test fun testGenerate() { - val namespace = TestingNamespaces.generate().split("_".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + val namespace = + TestingNamespaces.generate() + .split("_".toRegex()) + .dropLastWhile { it.isEmpty() } + .toTypedArray() Assertions.assertEquals("test", namespace[0]) - Assertions.assertEquals(FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), namespace[1]) + Assertions.assertEquals( + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), + namespace[1] + ) Assertions.assertFalse(namespace[2].isBlank()) } @Test fun testGenerateWithPrefix() { - val namespace = TestingNamespaces.generate("myprefix").split("_".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + val namespace = + TestingNamespaces.generate("myprefix") + .split("_".toRegex()) + .dropLastWhile { it.isEmpty() } + .toTypedArray() Assertions.assertEquals("myprefix", namespace[0]) Assertions.assertEquals("test", namespace[1]) - Assertions.assertEquals(FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), namespace[2]) + Assertions.assertEquals( + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC")).toLocalDate()), + namespace[2] + ) Assertions.assertFalse(namespace[3].isBlank()) } @Test fun testIsOlderThan2Days() { - Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(0) + "_12345")) - Assertions.assertTrue(TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(2) + "_12345")) + Assertions.assertFalse( + TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(0) + "_12345") + ) + Assertions.assertTrue( + TestingNamespaces.isOlderThan2Days("myprefix_test_" + getDate(2) + "_12345") + ) } @Test @@ -39,14 +57,21 @@ internal class TestingNamespacesTest { Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("12345")) Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("test_12345")) Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("hello_test_12345")) - Assertions.assertFalse(TestingNamespaces.isOlderThan2Days("myprefix_test1_" + getDate(2) + "_12345")) + Assertions.assertFalse( + TestingNamespaces.isOlderThan2Days("myprefix_test1_" + getDate(2) + "_12345") + ) } companion object { private val FORMATTER: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyyMMdd") private fun getDate(daysAgo: Int): String { - return FORMATTER.format(Instant.now().minus(daysAgo.toLong(), ChronoUnit.DAYS).atZone(ZoneId.of("UTC")).toLocalDate()) + return FORMATTER.format( + Instant.now() + .minus(daysAgo.toLong(), ChronoUnit.DAYS) + .atZone(ZoneId.of("UTC")) + .toLocalDate() + ) } } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt index 0b58c467f5bb..8fab95caa21e 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTest.kt @@ -9,6 +9,7 @@ import com.google.common.collect.ImmutableMap import com.google.common.collect.Lists import com.google.common.collect.Sets import io.airbyte.cdk.integrations.destination.NamingConventionTransformer +import io.airbyte.cdk.integrations.standardtest.destination.* import io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataArgumentsProvider import io.airbyte.cdk.integrations.standardtest.destination.argproviders.DataTypeTestArgumentProvider import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil @@ -20,12 +21,23 @@ import io.airbyte.commons.json.Jsons import io.airbyte.commons.lang.Exceptions import io.airbyte.commons.resources.MoreResources import io.airbyte.commons.util.MoreIterators -import io.airbyte.configoss.* -import io.airbyte.protocol.models.AirbyteMessage -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog +import io.airbyte.configoss.JobGetSpecConfig +import io.airbyte.configoss.OperatorDbt +import io.airbyte.configoss.StandardCheckConnectionInput +import io.airbyte.configoss.StandardCheckConnectionOutput +import io.airbyte.configoss.StandardCheckConnectionOutput.Status +import io.airbyte.configoss.WorkerDestinationConfig import io.airbyte.protocol.models.Field import io.airbyte.protocol.models.JsonSchemaType -import io.airbyte.protocol.models.v0.* +import io.airbyte.protocol.models.v0.AirbyteCatalog +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteMessage.Type +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStateMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConnectorSpecification import io.airbyte.protocol.models.v0.DestinationSyncMode import io.airbyte.protocol.models.v0.SyncMode import io.airbyte.workers.exception.TestHarnessException @@ -41,17 +53,6 @@ import io.airbyte.workers.normalization.NormalizationRunner import io.airbyte.workers.process.AirbyteIntegrationLauncher import io.airbyte.workers.process.DockerProcessFactory import io.airbyte.workers.process.ProcessFactory -import lombok.Builder -import lombok.Getter -import org.junit.jupiter.api.* -import org.junit.jupiter.api.extension.ExtensionContext -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.Arguments -import org.junit.jupiter.params.provider.ArgumentsProvider -import org.junit.jupiter.params.provider.ArgumentsSource -import org.mockito.Mockito -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.io.IOException import java.io.UncheckedIOException import java.net.URISyntaxException @@ -60,24 +61,37 @@ import java.nio.file.Path import java.time.Instant import java.util.* import java.util.concurrent.atomic.AtomicInteger -import java.util.function.BinaryOperator import java.util.function.Consumer -import java.util.function.Function -import java.util.function.Predicate import java.util.stream.Collectors import java.util.stream.Stream +import kotlin.Comparator +import kotlin.collections.ArrayList +import kotlin.collections.HashSet +import org.junit.jupiter.api.* +import org.junit.jupiter.api.extension.ExtensionContext +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.ArgumentsProvider +import org.junit.jupiter.params.provider.ArgumentsSource +import org.mockito.Mockito +import org.slf4j.Logger +import org.slf4j.LoggerFactory abstract class DestinationAcceptanceTest { - protected var TEST_SCHEMAS: HashSet? = null + protected lateinit var TEST_SCHEMAS: HashSet - private var testEnv: TestDestinationEnv? = null + private lateinit var testEnv: TestDestinationEnv private var jobRoot: Path? = null private var processFactory: ProcessFactory? = null private var mConnectorConfigUpdater: ConnectorConfigUpdater? = null protected var localRoot: Path? = null - protected var testDataComparator: TestDataComparator = getTestDataComparator() + open protected var _testDataComparator: TestDataComparator = getTestDataComparator() + + open fun getTestDataComparator(): TestDataComparator { + return BasicTestDataComparator { this.resolveIdentifier(it) } + } protected abstract val imageName: String /** @@ -99,7 +113,10 @@ abstract class DestinationAcceptanceTest { } private val imageNameWithoutTag: String - get() = if (imageName.contains(":")) imageName.split(":".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()[0] else imageName + get() = + if (imageName.contains(":")) + imageName.split(":".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()[0] + else imageName private fun readMetadata(): JsonNode { return try { @@ -118,62 +135,65 @@ abstract class DestinationAcceptanceTest { get() { val metadata = readMetadata()["data"] ?: return null val normalizationConfig = metadata["normalizationConfig"] ?: return null - val normalizationRepository = normalizationConfig["normalizationRepository"] ?: return null + val normalizationRepository = + normalizationConfig["normalizationRepository"] ?: return null return normalizationRepository.asText() + ":" + NORMALIZATION_VERSION } - @get:Throws(Exception::class) - protected abstract val config: JsonNode - /** - * Configuration specific to the integration. Will be passed to integration where appropriate in - * each test. Should be valid. - * - * @return integration-specific configuration - */ - get + /** + * Configuration specific to the integration. Will be passed to integration where appropriate in + * each test. Should be valid. + * + * @return integration-specific configuration + */ + @Throws(Exception::class) protected abstract fun getConfig(): JsonNode - @get:Throws(Exception::class) - protected abstract val failCheckConfig: JsonNode? - /** - * Configuration specific to the integration. Will be passed to integration where appropriate in - * tests that test behavior when configuration is invalid. e.g incorrect password. Should be - * invalid. - * - * @return integration-specific configuration - */ - get + /** + * Configuration specific to the integration. Will be passed to integration where appropriate in + * tests that test behavior when configuration is invalid. e.g incorrect password. Should be + * invalid. + * + * @return integration-specific configuration + */ + @Throws(Exception::class) protected abstract fun getFailCheckConfig(): JsonNode? /** * Function that returns all of the records in destination as json at the time this method is - * invoked. These will be used to check that the data actually written is what should actually be - * there. Note: this returns a set and does not test any order guarantees. + * invoked. These will be used to check that the data actually written is what should actually + * be there. Note: this returns a set and does not test any order guarantees. * - * @param testEnv - information about the test environment. - * @param streamName - name of the stream for which we are retrieving records. - * @param namespace - the destination namespace records are located in. Null if not applicable. - * Usually a JDBC schema. - * @param streamSchema - schema of the stream to be retrieved. This is only necessary for - * destinations in which data types cannot be accurately inferred (e.g. in CSV destination, - * every value is a string). + * @param testEnv + * - information about the test environment. + * @param streamName + * - name of the stream for which we are retrieving records. + * @param namespace + * - the destination namespace records are located in. Null if not applicable. Usually a JDBC + * schema. + * @param streamSchema + * - schema of the stream to be retrieved. This is only necessary for destinations in which data + * types cannot be accurately inferred (e.g. in CSV destination, every value is a string). * @return All of the records in the destination at the time this method is invoked. - * @throws Exception - can throw any exception, test framework will handle. + * @throws Exception + * - can throw any exception, test framework will handle. */ @Throws(Exception::class) - protected abstract fun retrieveRecords(testEnv: TestDestinationEnv?, - streamName: String?, - namespace: String?, - streamSchema: JsonNode?): List + protected abstract fun retrieveRecords( + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, + streamSchema: JsonNode + ): List /** - * Returns a destination's default schema. The default implementation assumes this corresponds to - * the configuration's 'schema' field, as this is how most of our destinations implement this. - * Destinations are free to appropriately override this. The return value is used to assert - * correctness. - * + * Returns a destination's default schema. The default implementation assumes this corresponds + * to the configuration's 'schema' field, as this is how most of our destinations implement + * this. Destinations are free to appropriately override this. The return value is used to + * assert correctness. * * If not applicable, Destinations are free to ignore this. * - * @param config - integration-specific configuration returned by [.getConfig]. + * @param config + * - integration-specific configuration returned by [.getConfig]. * @return the default schema, if applicatble. */ @Throws(Exception::class) @@ -197,7 +217,8 @@ abstract class DestinationAcceptanceTest { * Detects if a destination implements append mode from the spec.json that should include * 'supportsIncremental' = true * - * @return - a boolean. + * @return + * - a boolean. */ @Throws(TestHarnessException::class) protected fun implementsAppend(): Boolean { @@ -213,7 +234,8 @@ abstract class DestinationAcceptanceTest { protected fun normalizationFromDefinition(): Boolean { val metadata = readMetadata()["data"] ?: return false val normalizationConfig = metadata["normalizationConfig"] ?: return false - return normalizationConfig.has("normalizationRepository") && normalizationConfig.has("normalizationTag") + return normalizationConfig.has("normalizationRepository") && + normalizationConfig.has("normalizationTag") } protected fun dbtFromDefinition(): Boolean { @@ -229,7 +251,8 @@ abstract class DestinationAcceptanceTest { get() { val metadata = readMetadata()["data"] ?: return null val normalizationConfig = metadata["normalizationConfig"] ?: return null - val normalizationIntegrationType = normalizationConfig["normalizationIntegrationType"] ?: return null + val normalizationIntegrationType = + normalizationConfig["normalizationIntegrationType"] ?: return null return normalizationIntegrationType.asText() } @@ -237,7 +260,8 @@ abstract class DestinationAcceptanceTest { * Detects if a destination implements append dedup mode from the spec.json that should include * 'supportedDestinationSyncMode' * - * @return - a boolean. + * @return + * - a boolean. */ @Throws(TestHarnessException::class) protected fun implementsAppendDedup(): Boolean { @@ -254,7 +278,8 @@ abstract class DestinationAcceptanceTest { * Detects if a destination implements overwrite mode from the spec.json that should include * 'supportedDestinationSyncMode' * - * @return - a boolean. + * @return + * - a boolean. */ @Throws(TestHarnessException::class) protected fun implementsOverwrite(): Boolean { @@ -268,50 +293,60 @@ abstract class DestinationAcceptanceTest { } /** - * Same idea as [.retrieveRecords]. Except this - * method should pull records from the table that contains the normalized records and convert them - * back into the data as it would appear in an [AirbyteRecordMessage]. Only need to override - * this method if [.normalizationFromDefinition] returns true. + * Same idea as [.retrieveRecords]. Except this method should pull records from the table that + * contains the normalized records and convert them back into the data as it would appear in an + * [AirbyteRecordMessage]. Only need to override this method if [.normalizationFromDefinition] + * returns true. * - * @param testEnv - information about the test environment. - * @param streamName - name of the stream for which we are retrieving records. - * @param namespace - the destination namespace records are located in. Null if not applicable. - * Usually a JDBC schema. + * @param testEnv + * - information about the test environment. + * @param streamName + * - name of the stream for which we are retrieving records. + * @param namespace + * - the destination namespace records are located in. Null if not applicable. Usually a JDBC + * schema. * @return All of the records in the destination at the time this method is invoked. - * @throws Exception - can throw any exception, test framework will handle. + * @throws Exception + * - can throw any exception, test framework will handle. */ @Throws(Exception::class) - protected fun retrieveNormalizedRecords(testEnv: TestDestinationEnv?, - streamName: String?, - namespace: String?): List { + protected fun retrieveNormalizedRecords( + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String? + ): List { throw IllegalStateException("Not implemented") } /** - * Function that performs any setup of external resources required for the test. e.g. instantiate a - * postgres database. This function will be called before EACH test. + * Function that performs any setup of external resources required for the test. e.g. + * instantiate a postgres database. This function will be called before EACH test. * - * @param testEnv - information about the test environment. + * @param testEnv + * - information about the test environment. * @param TEST_SCHEMAS - * @throws Exception - can throw any exception, test framework will handle. + * @throws Exception + * - can throw any exception, test framework will handle. */ @Throws(Exception::class) - protected abstract fun setup(testEnv: TestDestinationEnv?, TEST_SCHEMAS: HashSet?) + protected abstract fun setup(testEnv: TestDestinationEnv, TEST_SCHEMAS: HashSet) /** - * Function that performs any clean up of external resources required for the test. e.g. delete a - * postgres database. This function will be called after EACH test. It MUST remove all data in the - * destination so that there is no contamination across tests. + * Function that performs any clean up of external resources required for the test. e.g. delete + * a postgres database. This function will be called after EACH test. It MUST remove all data in + * the destination so that there is no contamination across tests. * - * @param testEnv - information about the test environment. - * @throws Exception - can throw any exception, test framework will handle. + * @param testEnv + * - information about the test environment. + * @throws Exception + * - can throw any exception, test framework will handle. */ - @Throws(Exception::class) - protected abstract fun tearDown(testEnv: TestDestinationEnv?) - + @Throws(Exception::class) protected abstract fun tearDown(testEnv: TestDestinationEnv) - @Deprecated("""This method is moved to the AdvancedTestDataComparator. Please move your destination - implementation of the method to your comparator implementation.""") + @Deprecated( + """This method is moved to the AdvancedTestDataComparator. Please move your destination + implementation of the method to your comparator implementation.""" + ) protected fun resolveIdentifier(identifier: String?): List { return java.util.List.of(identifier) } @@ -331,12 +366,14 @@ abstract class DestinationAcceptanceTest { TEST_SCHEMAS = HashSet() setup(testEnv, TEST_SCHEMAS) - processFactory = DockerProcessFactory( + processFactory = + DockerProcessFactory( workspaceRoot, workspaceRoot.toString(), localRoot.toString(), "host", - emptyMap()) + emptyMap() + ) } @AfterEach @@ -345,9 +382,7 @@ abstract class DestinationAcceptanceTest { tearDown(testEnv) } - /** - * Verify that when the integrations returns a valid spec. - */ + /** Verify that when the integrations returns a valid spec. */ @Test @Throws(TestHarnessException::class) fun testGetSpec() { @@ -361,7 +396,10 @@ abstract class DestinationAcceptanceTest { @Test @Throws(Exception::class) fun testCheckConnection() { - Assertions.assertEquals(StandardCheckConnectionOutput.Status.SUCCEEDED, runCheck(config).status) + Assertions.assertEquals( + StandardCheckConnectionOutput.Status.SUCCEEDED, + runCheck(getConfig()).status + ) } /** @@ -371,7 +409,10 @@ abstract class DestinationAcceptanceTest { @Test @Throws(Exception::class) fun testCheckConnectionInvalidCredentials() { - Assertions.assertEquals(StandardCheckConnectionOutput.Status.FAILED, runCheck(failCheckConfig).status) + Assertions.assertEquals( + StandardCheckConnectionOutput.Status.FAILED, + runCheck(getFailCheckConfig()).status + ) } /** @@ -382,56 +423,75 @@ abstract class DestinationAcceptanceTest { @ArgumentsSource(DataArgumentsProvider::class) @Throws(Exception::class) fun testSync(messagesFilename: String?, catalogFilename: String?) { - val catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - val messages: List = MoreResources.readResource(messagesFilename).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - - val config = config + val catalog = + Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages: List = + MoreResources.readResource(messagesFilename) + .lines() + .map { + Jsons.deserialize(it, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) + } + .toList() + + val config = getConfig() val defaultSchema = getDefaultSchema(config) runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) retrieveRawRecordsAndAssertSameMessages(catalog, messages, defaultSchema) } /** - * This serves to test MSSQL 2100 limit parameters in a single query. this means that for Airbyte - * insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100 params + * This serves to test MSSQL 2100 limit parameters in a single query. this means that for + * Airbyte insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100 + * params */ @ParameterizedTest @ArgumentsSource(DataArgumentsProvider::class) @Throws(Exception::class) - fun testSyncWithLargeRecordBatch(messagesFilename: String?, - catalogFilename: String?) { - val catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - val messages: List = MoreResources.readResource(messagesFilename).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - - val largeNumberRecords = Collections - .nCopies(400, messages) + fun testSyncWithLargeRecordBatch(messagesFilename: String?, catalogFilename: String?) { + val catalog = + Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages: List = + MoreResources.readResource(messagesFilename) + .lines() + .map { + Jsons.deserialize(it, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) + } + .toList() + + val largeNumberRecords = + Collections.nCopies(400, messages) .stream() - .flatMap { obj: List -> obj.stream() } // regroup messages per stream - .sorted(Comparator - .comparing { obj: io.airbyte.protocol.models.v0.AirbyteMessage -> obj.type } + .flatMap { obj: List -> + obj.stream() + } // regroup messages per stream + .sorted( + Comparator.comparing { obj: io.airbyte.protocol.models.v0.AirbyteMessage -> + obj.type + } .thenComparing { message: io.airbyte.protocol.models.v0.AirbyteMessage -> - if (message.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) message.record.stream + if ( + message.type == + io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD + ) + message.record.stream else message.toString() - }) + } + ) .collect(Collectors.toList()) - val config = config + val config = getConfig() runSyncAndVerifyStateOutput(config, largeNumberRecords, configuredCatalog, false) } - /** - * Verify that the integration overwrites the first sync with the second sync. - */ + /** Verify that the integration overwrites the first sync with the second sync. */ @Test @Throws(Exception::class) fun testSecondSync() { @@ -441,97 +501,159 @@ abstract class DestinationAcceptanceTest { } val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - - val firstSyncMessages: List = MoreResources.readResource( - DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - val config = config + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + + val firstSyncMessages: List = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { + Jsons.deserialize( + it, + io.airbyte.protocol.models.v0.AirbyteMessage::class.java + ) + } + .toList() + val config = getConfig() runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false) // We need to make sure that other streams\tables\files in the same location will not be // affected\deleted\overridden by our activities during first, second or any future sync. - // So let's create a dummy data that will be checked after all sync. It should remain the same + // So let's create a dummy data that will be checked after all sync. It should remain the + // same val dummyCatalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) dummyCatalog.streams[0].name = DUMMY_CATALOG_NAME - val configuredDummyCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - dummyCatalog) + val configuredDummyCatalog = CatalogHelpers.toDefaultConfiguredCatalog(dummyCatalog) // update messages to set new dummy stream name - firstSyncMessages.stream().filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> message.record != null } - .forEach { message: io.airbyte.protocol.models.v0.AirbyteMessage -> message.record.stream = DUMMY_CATALOG_NAME } + firstSyncMessages + .stream() + .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + message.record != null + } + .forEach { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + message.record.stream = DUMMY_CATALOG_NAME + } // sync dummy data runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredDummyCatalog, false) // Run second sync - val secondSyncMessages: List = Lists.newArrayList( - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage() - .withStream(catalog.streams[0].name) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() .put("id", 1) .put("currency", "USD") - .put("date", "2020-03-31T00:00:00Z") // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which + .put( + "date", + "2020-03-31T00:00:00Z" + ) // TODO(sherifnada) hack: write decimals with sigfigs + // because Snowflake stores 10.1 as "10" which // fails destination tests .put("HKD", 10.1) .put("NZD", 700.1) - .build()))), - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) - .withState(AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) val defaultSchema = getDefaultSchema(config) retrieveRawRecordsAndAssertSameMessages(catalog, secondSyncMessages, defaultSchema) - // verify that other streams in the same location were not affected. If something fails here, + // verify that other streams in the same location were not affected. If something fails + // here, // then this need to be fixed in connectors logic to override only required streams retrieveRawRecordsAndAssertSameMessages(dummyCatalog, firstSyncMessages, defaultSchema) } /** - * Tests that we are able to read over special characters properly when processing line breaks in - * destinations. + * Tests that we are able to read over special characters properly when processing line breaks + * in destinations. */ @Test @Throws(Exception::class) fun testLineBreakCharacters() { val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - val config = config - - val secondSyncMessages: List = Lists.newArrayList( - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage() - .withStream(catalog.streams[0].name) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val config = getConfig() + + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() .put("id", 1) .put("currency", "USD\u2028") - .put("date", "2020-03-\n31T00:00:00Z\r") // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which + .put( + "date", + "2020-03-\n31T00:00:00Z\r" + ) // TODO(sherifnada) hack: write decimals with sigfigs + // because Snowflake stores 10.1 as "10" which // fails destination tests .put("HKD", 10.1) .put("NZD", 700.1) - .build()))), - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) - .withState(AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) val defaultSchema = getDefaultSchema(config) @@ -544,14 +666,18 @@ abstract class DestinationAcceptanceTest { var normalizationRunnerFactorySupportsDestinationImage: Boolean try { DefaultNormalizationRunner( - processFactory, - normalizationImageName, - normalizationIntegrationType) + processFactory, + normalizationImageName, + normalizationIntegrationType + ) normalizationRunnerFactorySupportsDestinationImage = true } catch (e: IllegalStateException) { normalizationRunnerFactorySupportsDestinationImage = false } - Assertions.assertEquals(normalizationFromDefinition(), normalizationRunnerFactorySupportsDestinationImage) + Assertions.assertEquals( + normalizationFromDefinition(), + normalizationRunnerFactorySupportsDestinationImage + ) } } @@ -568,49 +694,79 @@ abstract class DestinationAcceptanceTest { } val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - configuredCatalog.streams.forEach(Consumer { s: ConfiguredAirbyteStream -> + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + configuredCatalog.streams.forEach { s -> s.withSyncMode(SyncMode.INCREMENTAL) s.withDestinationSyncMode(DestinationSyncMode.APPEND) - }) + } - val firstSyncMessages: List = MoreResources.readResource( - DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - val config = config + val firstSyncMessages: List = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + .toList() + val config = getConfig() runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false) - val secondSyncMessages: List = Lists.newArrayList( - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage() - .withStream(catalog.streams[0].name) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() .put("id", 1) .put("currency", "USD") - .put("date", "2020-03-31T00:00:00Z") // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which + .put( + "date", + "2020-03-31T00:00:00Z" + ) // TODO(sherifnada) hack: write decimals with sigfigs + // because Snowflake stores 10.1 as "10" which // fails destination tests .put("HKD", 10.1) .put("NZD", 700.1) - .build()))), - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) - .withState(AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) - val expectedMessagesAfterSecondSync: MutableList = ArrayList() + val expectedMessagesAfterSecondSync: + MutableList = + ArrayList() expectedMessagesAfterSecondSync.addAll(firstSyncMessages) expectedMessagesAfterSecondSync.addAll(secondSyncMessages) val defaultSchema = getDefaultSchema(config) - retrieveRawRecordsAndAssertSameMessages(catalog, expectedMessagesAfterSecondSync, - defaultSchema) + retrieveRawRecordsAndAssertSameMessages( + catalog, + expectedMessagesAfterSecondSync, + defaultSchema + ) } @ArgumentsSource(DataArgumentsProvider::class) @@ -621,45 +777,69 @@ abstract class DestinationAcceptanceTest { return } - val catalog = Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(ProtocolVersion.V0)), - AirbyteCatalog::class.java) + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + ProtocolVersion.V0 + ) + ), + AirbyteCatalog::class.java + ) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - configuredCatalog.streams.forEach(Consumer { s: ConfiguredAirbyteStream -> + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + configuredCatalog.streams.forEach { s -> s.withSyncMode(SyncMode.INCREMENTAL) s.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP) s.withCursorField(emptyList()) // use composite primary key of various types (string, float) s.withPrimaryKey( - java.util.List.of(listOf("id"), listOf("currency"), listOf("date"), listOf("NZD"), listOf("USD"))) - }) + java.util.List.of( + listOf("id"), + listOf("currency"), + listOf("date"), + listOf("NZD"), + listOf("USD") + ) + ) + } - var messages: MutableList = MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(ProtocolVersion.V0)) + var messages: List = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + ProtocolVersion.V0 + ) + ) .lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } - val config = config + val config = getConfig() runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) val defaultSchema = getDefaultSchema(config) - var actualMessages = retrieveNormalizedRecords(catalog, - defaultSchema) + var actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) assertSameMessages(messages, actualMessages, true) // remove one field val jsonSchema = configuredCatalog.streams[0].stream.jsonSchema (jsonSchema.findValue("properties") as ObjectNode).remove("HKD") // insert more messages - // NOTE: we re-read the messages because `assertSameMessages` above pruned the emittedAt timestamps. - messages = MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(ProtocolVersion.V0)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - messages.add(Jsons.deserialize( + // NOTE: we re-read the messages because `assertSameMessages` above pruned the emittedAt + // timestamps. + messages = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + ProtocolVersion.V0 + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + messages.addLast( + Jsons.deserialize( "{\"type\": \"RECORD\", \"record\": {\"stream\": \"exchange_rate\", \"emitted_at\": 1602637989500, \"data\": { \"id\": 2, \"currency\": \"EUR\", \"date\": \"2020-09-02T00:00:00Z\", \"NZD\": 1.14, \"USD\": 10.16}}}\n", - io.airbyte.protocol.models.v0.AirbyteMessage::class.java)) + io.airbyte.protocol.models.v0.AirbyteMessage::class.java + ) + ) runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) @@ -667,12 +847,16 @@ abstract class DestinationAcceptanceTest { actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) // We expect all the of messages to be missing the removed column after normalization. - val expectedMessages = messages.stream().map { message: io.airbyte.protocol.models.v0.AirbyteMessage -> - if (message.record != null) { - (message.record.data as ObjectNode).remove("HKD") - } - message - }.collect(Collectors.toList()) + val expectedMessages = + messages + .stream() + .map { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + if (message.record != null) { + (message.record.data as ObjectNode).remove("HKD") + } + message + } + .collect(Collectors.toList()) assertSameMessages(expectedMessages, actualMessages, true) } @@ -688,136 +872,174 @@ abstract class DestinationAcceptanceTest { return } - val catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - val messages: List = MoreResources.readResource(messagesFilename).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) + val catalog = + Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + val messages = + MoreResources.readResource(messagesFilename).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } - val config = config + val config = getConfig() runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) val defaultSchema = getDefaultSchema(config) - val actualMessages = retrieveNormalizedRecords(catalog, - defaultSchema) + val actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) assertSameMessages(messages, actualMessages, true) } /** - * Verify that the integration successfully writes records successfully both raw and normalized and - * run dedupe transformations. - * + * Verify that the integration successfully writes records successfully both raw and normalized + * and run dedupe transformations. * - * Although this test assumes append-dedup requires normalization, and almost all our Destinations - * do so, this is not necessarily true. This explains [.implementsAppendDedup]. + * Although this test assumes append-dedup requires normalization, and almost all our + * Destinations do so, this is not necessarily true. This explains [.implementsAppendDedup]. */ @Test @Throws(Exception::class) fun testIncrementalDedupeSync() { if (!implementsAppendDedup()) { LOGGER.info( - "Destination's spec.json does not include 'append_dedupe' in its '\"supportedDestinationSyncModes\"'") + "Destination's spec.json does not include 'append_dedupe' in its '\"supportedDestinationSyncModes\"'" + ) return } val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - configuredCatalog.streams.forEach(Consumer { s: ConfiguredAirbyteStream -> + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + configuredCatalog.streams.forEach { s -> s.withSyncMode(SyncMode.INCREMENTAL) s.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP) s.withCursorField(emptyList()) // use composite primary key of various types (string, float) s.withPrimaryKey( - java.util.List.of(listOf("id"), listOf("currency"), listOf("date"), listOf("NZD"))) - }) - - val firstSyncMessages: List = MoreResources.readResource( - DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - val config = config - runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, supportsNormalization()) - - val secondSyncMessages: List = Lists.newArrayList( - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage() - .withStream(catalog.streams[0].name) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() + java.util.List.of(listOf("id"), listOf("currency"), listOf("date"), listOf("NZD")) + ) + } + + val firstSyncMessages = + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val config = getConfig() + runSyncAndVerifyStateOutput( + config, + firstSyncMessages, + configuredCatalog, + supportsNormalization() + ) + + val secondSyncMessages: List = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() .put("id", 2) .put("currency", "EUR") .put("date", "2020-09-01T00:00:00Z") .put("HKD", 10.5) .put("NZD", 1.14) - .build()))), - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage() - .withStream(catalog.streams[0].name) - .withEmittedAt(Instant.now().toEpochMilli() + 100L) - .withData(Jsons.jsonNode(ImmutableMap.builder() + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(catalog.streams[0].name) + .withEmittedAt(Instant.now().toEpochMilli() + 100L) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() .put("id", 1) .put("currency", "USD") .put("date", "2020-09-01T00:00:00Z") .put("HKD", 5.4) .put("NZD", 1.14) - .build()))), - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) - .withState(AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) runSyncAndVerifyStateOutput(config, secondSyncMessages, configuredCatalog, false) - val expectedMessagesAfterSecondSync: MutableList = ArrayList() + val expectedMessagesAfterSecondSync: + MutableList = + ArrayList() expectedMessagesAfterSecondSync.addAll(firstSyncMessages) expectedMessagesAfterSecondSync.addAll(secondSyncMessages) - val latestMessagesOnly = expectedMessagesAfterSecondSync - .stream() - .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> message.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD && message.record != null } - .collect(Collectors.toMap( - Function { message: io.airbyte.protocol.models.v0.AirbyteMessage -> - message.record.data["id"].asText() + - message.record.data["currency"].asText() + - message.record.data["date"].asText() + - message.record.data["NZD"].asText() - }, - Function { message: io.airbyte.protocol.models.v0.AirbyteMessage -> message }, // keep only latest emitted record message per primary key/cursor - BinaryOperator { a: io.airbyte.protocol.models.v0.AirbyteMessage, b: io.airbyte.protocol.models.v0.AirbyteMessage -> if (a.record.emittedAt > b.record.emittedAt) a else b })) - // Filter expectedMessagesAfterSecondSync and keep latest messages only (keep same message order) - val expectedMessages = expectedMessagesAfterSecondSync + val latestMessagesOnly = + expectedMessagesAfterSecondSync + .filter { it.type == Type.RECORD && it.record != null } + .groupBy { + it.record.data["id"].asText() + + it.record.data["currency"].asText() + + it.record.data["date"].asText() + + it.record.data["NZD"].asText() + } + .mapValues { it.value.maxBy { it.record.emittedAt } } + // Filter expectedMessagesAfterSecondSync and keep latest messages only (keep same message + // order) + val expectedMessages = + expectedMessagesAfterSecondSync .stream() - .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> message.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD && message.record != null } - .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> - val key = message.record.data["id"].asText() + - message.record.data["currency"].asText() + - message.record.data["date"].asText() + - message.record.data["NZD"].asText() - (message.record.emittedAt - == latestMessagesOnly[key]!!.record.emittedAt) - }.collect(Collectors.toList()) + .filter { it.type == Type.RECORD && it.record != null } + .filter { + val key = + it.record.data["id"].asText() + + it.record.data["currency"].asText() + + it.record.data["date"].asText() + + it.record.data["NZD"].asText() + (it.record.emittedAt == latestMessagesOnly[key]!!.record.emittedAt) + } + .collect(Collectors.toList()) val defaultSchema = getDefaultSchema(config) - retrieveRawRecordsAndAssertSameMessages(catalog, expectedMessagesAfterSecondSync, - defaultSchema) + retrieveRawRecordsAndAssertSameMessages( + catalog, + expectedMessagesAfterSecondSync, + defaultSchema + ) if (normalizationFromDefinition()) { - val actualMessages = retrieveNormalizedRecords(catalog, - defaultSchema) + val actualMessages = retrieveNormalizedRecords(catalog, defaultSchema) assertSameMessages(expectedMessages, actualMessages, true) } } protected val maxRecordValueLimit: Int - /** - * @return the max limit length allowed for values in the destination. - */ + /** @return the max limit length allowed for values in the destination. */ get() = 1000000000 @Test @@ -827,25 +1049,35 @@ abstract class DestinationAcceptanceTest { return } - val config = config + val config = getConfig() - // This may throw IllegalStateException "Requesting normalization, but it is not included in the + // This may throw IllegalStateException "Requesting normalization, but it is not included in + // the // normalization mappings" - // We indeed require normalization implementation of the 'transform_config' function for this + // We indeed require normalization implementation of the 'transform_config' function for + // this // destination, - // because we make sure to install required dbt dependency in the normalization docker image in + // because we make sure to install required dbt dependency in the normalization docker image + // in // order to run - // this test successfully and that we are able to convert a destination 'config.json' into a dbt + // this test successfully and that we are able to convert a destination 'config.json' into a + // dbt // 'profiles.yml' // (we don't actually rely on normalization running anything else here though) - val runner = DbtTransformationRunner(processFactory, + val runner = + DbtTransformationRunner( + processFactory, DefaultNormalizationRunner( - processFactory, - normalizationImageName, - normalizationIntegrationType)) + processFactory, + normalizationImageName, + normalizationIntegrationType + ) + ) runner.start() val transformationRoot = Files.createDirectories(jobRoot!!.resolve("transform")) - val dbtConfig = OperatorDbt() // Forked from https://github.com/dbt-labs/jaffle_shop because they made a change that would have + val dbtConfig = + OperatorDbt() // Forked from https://github.com/dbt-labs/jaffle_shop because they made a + // change that would have // required a dbt version upgrade // https://github.com/dbt-labs/jaffle_shop/commit/b1680f3278437c081c735b7ea71c2ff9707bc75f#diff-27386df54b2629c1191d8342d3725ed8678413cfa13b5556f59d69d33fae5425R20 // We're actually two commits upstream of that, because the previous commit @@ -858,7 +1090,8 @@ abstract class DestinationAcceptanceTest { // // jaffle_shop is a fictional ecommerce store maintained by fishtownanalytics/dbt. // - // This dbt project transforms raw data from an app database into a customers and orders model ready + // This dbt project transforms raw data from an app database into a customers and orders + // model ready // for analytics. // The repo is a self-contained playground dbt project, useful for testing out scripts, and // communicating some of the core dbt concepts: @@ -873,16 +1106,19 @@ abstract class DestinationAcceptanceTest { if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { throw TestHarnessException("dbt deps Failed.") } - // 3. It contains seeds that includes some (fake) raw data from a fictional app as CSVs data sets. + // 3. It contains seeds that includes some (fake) raw data from a fictional app as CSVs data + // sets. // This materializes the CSVs as tables in your target schema. - // Note that a typical dbt project does not require this step since dbt assumes your raw data is + // Note that a typical dbt project does not require this step since dbt assumes your raw + // data is // already in your warehouse. dbtConfig.withDbtArguments("seed") if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { throw TestHarnessException("dbt seed Failed.") } // 4. Run the models: - // Note: If this steps fails, it might mean that you need to make small changes to the SQL in the + // Note: If this steps fails, it might mean that you need to make small changes to the SQL + // in the // models folder to adjust for the flavor of SQL of your target database. dbtConfig.withDbtArguments("run") if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { @@ -896,7 +1132,8 @@ abstract class DestinationAcceptanceTest { // 6. Generate dbt documentation for the project: // This step is commented out because it takes a long time, but is not vital for Airbyte // dbtConfig.withDbtArguments("docs generate"); - // if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) { + // if (!runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig)) + // { // throw new WorkerException("dbt docs generate Failed."); // } runner.close() @@ -906,22 +1143,29 @@ abstract class DestinationAcceptanceTest { @Throws(Exception::class) fun testCustomDbtTransformationsFailure() { if (!normalizationFromDefinition() || !dbtFromDefinition()) { - // we require normalization implementation for this destination, because we make sure to install - // required dbt dependency in the normalization docker image in order to run this test successfully + // we require normalization implementation for this destination, because we make sure to + // install + // required dbt dependency in the normalization docker image in order to run this test + // successfully // (we don't actually rely on normalization running anything here though) return } - val config = config + val config = getConfig() - val runner = DbtTransformationRunner(processFactory, + val runner = + DbtTransformationRunner( + processFactory, DefaultNormalizationRunner( - processFactory, - normalizationImageName, - normalizationIntegrationType)) + processFactory, + normalizationImageName, + normalizationIntegrationType + ) + ) runner.start() val transformationRoot = Files.createDirectories(jobRoot!!.resolve("transform")) - val dbtConfig = OperatorDbt() + val dbtConfig = + OperatorDbt() .withGitRepoUrl("https://github.com/fishtown-analytics/dbt-learn-demo.git") .withGitRepoBranch("main") .withDockerImage("fishtownanalytics/dbt:0.19.1") @@ -931,13 +1175,13 @@ abstract class DestinationAcceptanceTest { } dbtConfig.withDbtArguments("test") - Assertions.assertFalse(runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig), - "dbt test should fail, as we haven't run dbt run on this project yet") + Assertions.assertFalse( + runner.transform(JOB_ID, JOB_ATTEMPT, transformationRoot, config, null, dbtConfig), + "dbt test should fail, as we haven't run dbt run on this project yet" + ) } - /** - * Verify the destination uses the namespace field if it is set. - */ + /** Verify the destination uses the namespace field if it is set. */ @Test @Throws(Exception::class) fun testSyncUsesAirbyteStreamNamespaceIfNotNull() { @@ -947,33 +1191,38 @@ abstract class DestinationAcceptanceTest { // TODO(davin): make these tests part of the catalog file. val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) // A unique namespace is required to avoid test isolation problems. val namespace = TestingNamespaces.generate("source_namespace") TEST_SCHEMAS!!.add(namespace) catalog.streams.forEach(Consumer { stream: AirbyteStream -> stream.namespace = namespace }) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messages: List = MoreResources.readResource( - DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - val messagesWithNewNamespace = getRecordMessagesWithNewNamespace( - messages, namespace) + val messages = + MoreResources.readResource( + DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val messagesWithNewNamespace = getRecordMessagesWithNewNamespace(messages, namespace) - val config = config + val config = getConfig() val defaultSchema = getDefaultSchema(config) runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false) retrieveRawRecordsAndAssertSameMessages(catalog, messagesWithNewNamespace, defaultSchema) } - /** - * Verify a destination is able to write tables with the same name to different namespaces. - */ + /** Verify a destination is able to write tables with the same name to different namespaces. */ @Test @Throws(Exception::class) fun testSyncWriteSameTableNameDifferentNamespace() { @@ -983,9 +1232,14 @@ abstract class DestinationAcceptanceTest { // TODO(davin): make these tests part of the catalog file. val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) val namespace1 = TestingNamespaces.generate("source_namespace") TEST_SCHEMAS!!.add(namespace1) catalog.streams.forEach(Consumer { stream: AirbyteStream -> stream.namespace = namespace1 }) @@ -995,102 +1249,127 @@ abstract class DestinationAcceptanceTest { TEST_SCHEMAS!!.add(namespace2) val mapper = MoreMappers.initMapper() for (stream in catalog.streams) { - val clonedStream = mapper.readValue(mapper.writeValueAsString(stream), - AirbyteStream::class.java) + val clonedStream = + mapper.readValue(mapper.writeValueAsString(stream), AirbyteStream::class.java) clonedStream.namespace = namespace2 diffNamespaceStreams.add(clonedStream) } catalog.streams.addAll(diffNamespaceStreams) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messageFile: String = DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion) - val ns1Messages: List = MoreResources.readResource(messageFile).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) + val messageFile: String = + DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(getProtocolVersion()) + val ns1Messages = + MoreResources.readResource(messageFile).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } val ns1MessagesAtNamespace1 = getRecordMessagesWithNewNamespace(ns1Messages, namespace1) - val ns2Messages: List = MoreResources.readResource(messageFile).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) + val ns2Messages: List = + MoreResources.readResource(messageFile).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } val ns2MessagesAtNamespace2 = getRecordMessagesWithNewNamespace(ns2Messages, namespace2) val allMessages = ArrayList(ns1MessagesAtNamespace1) allMessages.addAll(ns2MessagesAtNamespace2) - val config = config + val config = getConfig() val defaultSchema = getDefaultSchema(config) runSyncAndVerifyStateOutput(config, allMessages, configuredCatalog, false) retrieveRawRecordsAndAssertSameMessages(catalog, allMessages, defaultSchema) } /** - * The goal of this test is to verify the expected conversions of a namespace as it appears in the - * catalog to how it appears in the destination. Each database has its own rules, so this test runs - * through several "edge" case sorts of names and checks the behavior. + * The goal of this test is to verify the expected conversions of a namespace as it appears in + * the catalog to how it appears in the destination. Each database has its own rules, so this + * test runs through several "edge" case sorts of names and checks the behavior. * - * @param testCaseId - the id of each test case in namespace_test_cases.json so that we can handle - * an individual case specially for a specific database. - * @param namespaceInCatalog - namespace as it would appear in the catalog - * @param namespaceInDst - namespace as we would expect it to appear in the destination (this may be - * overridden for different databases). - * @throws Exception - broad catch of exception to hydrate log information with additional test case - * context. + * @param testCaseId + * - the id of each test case in namespace_test_cases.json so that we can handle an individual + * case specially for a specific database. + * @param namespaceInCatalog + * - namespace as it would appear in the catalog + * @param namespaceInDst + * - namespace as we would expect it to appear in the destination (this may be overridden for + * different databases). + * @throws Exception + * - broad catch of exception to hydrate log information with additional test case context. */ @ParameterizedTest @ArgumentsSource(NamespaceTestCaseProvider::class) @Throws(Exception::class) - fun testNamespaces(testCaseId: String?, - namespaceInCatalog: String?, - namespaceInDst: String?) { - val nameTransformer = nameTransformer + fun testNamespaces(testCaseId: String?, namespaceInCatalog: String, namespaceInDst: String?) { + val nameTransformer = getNameTransformer() nameTransformer.ifPresent { namingConventionTransformer: NamingConventionTransformer -> - assertNamespaceNormalization(testCaseId, - namespaceInDst, - namingConventionTransformer.getNamespace(namespaceInCatalog!!)) + assertNamespaceNormalization( + testCaseId, + namespaceInDst, + namingConventionTransformer.getNamespace(namespaceInCatalog!!) + ) } if (!implementsNamespaces() || !supportNamespaceTest()) { return } - val catalog = Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.NAMESPACE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) - catalog.streams.forEach(Consumer { stream: AirbyteStream -> stream.namespace = namespaceInCatalog }) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) + val catalog = + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.NAMESPACE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + catalog.streams.forEach( + Consumer { stream: AirbyteStream -> stream.namespace = namespaceInCatalog } + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messages: List = MoreResources.readResource( - DataArgumentsProvider.Companion.NAMESPACE_CONFIG.getMessageFileVersion(protocolVersion)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - val messagesWithNewNamespace = getRecordMessagesWithNewNamespace(messages, namespaceInCatalog) + val messages = + MoreResources.readResource( + DataArgumentsProvider.NAMESPACE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val messagesWithNewNamespace = + getRecordMessagesWithNewNamespace(messages, namespaceInCatalog) - val config = config + val config = getConfig() try { runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false) // Add to the list of schemas to clean up. TEST_SCHEMAS!!.add(namespaceInCatalog) } catch (e: Exception) { - throw IOException(String.format( + throw IOException( + String.format( "[Test Case %s] Destination failed to sync data to namespace %s, see \"namespace_test_cases.json for details\"", - testCaseId, namespaceInCatalog), e) + testCaseId, + namespaceInCatalog + ), + e + ) } } /** - * In order to launch a source on Kubernetes in a pod, we need to be able to wrap the entrypoint. - * The source connector must specify its entrypoint in the AIRBYTE_ENTRYPOINT variable. This test - * ensures that the entrypoint environment variable is set. + * In order to launch a source on Kubernetes in a pod, we need to be able to wrap the + * entrypoint. The source connector must specify its entrypoint in the AIRBYTE_ENTRYPOINT + * variable. This test ensures that the entrypoint environment variable is set. */ @Test @Throws(Exception::class) fun testEntrypointEnvVar() { - val entrypoint = EntrypointEnvChecker.getEntrypointEnvVariable( + val entrypoint = + EntrypointEnvChecker.getEntrypointEnvVariable( processFactory, JOB_ID, JOB_ATTEMPT, jobRoot, - imageName) + imageName + ) Assertions.assertNotNull(entrypoint) Assertions.assertFalse(entrypoint.isBlank()) @@ -1111,28 +1390,42 @@ abstract class DestinationAcceptanceTest { } val catalog = - Jsons.deserialize( - MoreResources.readResource(DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - AirbyteCatalog::class.java) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) - - val firstSyncMessages: List = MoreResources.readResource( - DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion)).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) - val config = config + Jsons.deserialize( + MoreResources.readResource( + DataArgumentsProvider.Companion.EXCHANGE_RATE_CONFIG.getCatalogFileVersion( + getProtocolVersion() + ) + ), + AirbyteCatalog::class.java + ) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) + + val firstSyncMessages = + MoreResources.readResource( + DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion( + getProtocolVersion() + ) + ) + .lines() + .map { Jsons.deserialize(it, AirbyteMessage::class.java) } + val config = getConfig() runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false) val stream = catalog.streams[0] // Run second sync with new fields on the message - val secondSyncMessagesWithNewFields: MutableList = Lists.newArrayList( - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage() - .withStream(stream.name) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.builder() + val secondSyncMessagesWithNewFields: + MutableList = + Lists.newArrayList( + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(Type.RECORD) + .withRecord( + AirbyteRecordMessage() + .withStream(stream.name) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData( + Jsons.jsonNode( + ImmutableMap.builder() .put("id", 1) .put("currency", "USD") .put("date", "2020-03-31T00:00:00Z") @@ -1140,73 +1433,130 @@ abstract class DestinationAcceptanceTest { .put("newFieldNumber", 3) .put("HKD", 10.1) .put("NZD", 700.1) - .build()))), - io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) - .withState(AirbyteStateMessage().withData( - Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))))) + .build() + ) + ) + ), + io.airbyte.protocol.models.v0 + .AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of("checkpoint", 2))) + ) + ) // Run sync and verify that all message were written without failing - runSyncAndVerifyStateOutput(config, secondSyncMessagesWithNewFields, configuredCatalog, false) - val destinationOutput = retrieveRecords(testEnv, stream.name, - getDefaultSchema(config), stream.jsonSchema) + runSyncAndVerifyStateOutput( + config, + secondSyncMessagesWithNewFields, + configuredCatalog, + false + ) + val destinationOutput = + retrieveRecords(testEnv, stream.name, getDefaultSchema(config), stream.jsonSchema) // Remove state message - secondSyncMessagesWithNewFields.removeIf { airbyteMessage: io.airbyte.protocol.models.v0.AirbyteMessage -> airbyteMessage.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE } + secondSyncMessagesWithNewFields.removeIf { + airbyteMessage: io.airbyte.protocol.models.v0.AirbyteMessage -> + airbyteMessage.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE + } Assertions.assertEquals(secondSyncMessagesWithNewFields.size, destinationOutput.size) } - /** - * Whether the destination should be tested against different namespaces. - */ + /** Whether the destination should be tested against different namespaces. */ protected fun supportNamespaceTest(): Boolean { return false } - protected open val nameTransformer: Optional - /** - * Set up the name transformer used by a destination to test it against a variety of namespaces. - */ - get() = Optional.empty() + /** + * Set up the name transformer used by a destination to test it against a variety of namespaces. + */ + protected open fun getNameTransformer(): Optional = + Optional.empty() /** - * Override this method if the normalized namespace is different from the default one. E.g. BigQuery - * does allow a name starting with a number. So it should change the expected normalized namespace - * when testCaseId = "S3A-1". Find the testCaseId in "namespace_test_cases.json". + * Override this method if the normalized namespace is different from the default one. E.g. + * BigQuery does allow a name starting with a number. So it should change the expected + * normalized namespace when testCaseId = "S3A-1". Find the testCaseId in + * "namespace_test_cases.json". */ - protected fun assertNamespaceNormalization(testCaseId: String?, - expectedNormalizedNamespace: String?, - actualNormalizedNamespace: String?) { - Assertions.assertEquals(expectedNormalizedNamespace, actualNormalizedNamespace, - String.format( - "Test case %s failed; if this is expected, please override assertNamespaceNormalization", - testCaseId)) + protected fun assertNamespaceNormalization( + testCaseId: String?, + expectedNormalizedNamespace: String?, + actualNormalizedNamespace: String? + ) { + Assertions.assertEquals( + expectedNormalizedNamespace, + actualNormalizedNamespace, + String.format( + "Test case %s failed; if this is expected, please override assertNamespaceNormalization", + testCaseId + ) + ) } @Throws(TestHarnessException::class) private fun runSpec(): ConnectorSpecification { return convertProtocolObject( - DefaultGetSpecTestHarness( - AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, imageName, processFactory, null, null, false, EnvVariableFeatureFlags())) - .run(JobGetSpecConfig().withDockerImage(imageName), jobRoot).spec, - ConnectorSpecification::class.java) + DefaultGetSpecTestHarness( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ) + ) + .run(JobGetSpecConfig().withDockerImage(imageName), jobRoot) + .spec, + ConnectorSpecification::class.java + ) } @Throws(TestHarnessException::class) protected fun runCheck(config: JsonNode?): StandardCheckConnectionOutput { return DefaultCheckConnectionTestHarness( - AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, imageName, processFactory, null, null, false, EnvVariableFeatureFlags()), - mConnectorConfigUpdater) - .run(StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot) - .checkConnection + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ), + mConnectorConfigUpdater + ) + .run(StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot) + .checkConnection } protected fun runCheckWithCatchedException( - config: JsonNode?): StandardCheckConnectionOutput.Status { + config: JsonNode? + ): StandardCheckConnectionOutput.Status { try { - val standardCheckConnectionOutput = DefaultCheckConnectionTestHarness( - AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, imageName, processFactory, null, null, false, EnvVariableFeatureFlags()), - mConnectorConfigUpdater) - .run(StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot) + val standardCheckConnectionOutput = + DefaultCheckConnectionTestHarness( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ), + mConnectorConfigUpdater + ) + .run( + StandardCheckConnectionInput().withConnectionConfiguration(config), + jobRoot + ) .checkConnection return standardCheckConnectionOutput.status } catch (e: Exception) { @@ -1216,30 +1566,49 @@ abstract class DestinationAcceptanceTest { } protected val destination: AirbyteDestination - get() = DefaultAirbyteDestination( - AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, imageName, processFactory, null, null, false, EnvVariableFeatureFlags())) + get() = + DefaultAirbyteDestination( + AirbyteIntegrationLauncher( + JOB_ID, + JOB_ATTEMPT, + imageName, + processFactory, + null, + null, + false, + EnvVariableFeatureFlags() + ) + ) @Throws(Exception::class) - protected fun runSyncAndVerifyStateOutput(config: JsonNode, - messages: List, - catalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, - runNormalization: Boolean) { - val destinationOutput = runSync(config, messages, catalog, - runNormalization) - - val expectedStateMessage = reversed(messages) + protected fun runSyncAndVerifyStateOutput( + config: JsonNode, + messages: List, + catalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + runNormalization: Boolean + ) { + val destinationOutput = runSync(config, messages, catalog, runNormalization) + + val expectedStateMessage = + reversed(messages) .stream() - .filter(Predicate { m: io.airbyte.protocol.models.v0.AirbyteMessage -> m.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE }) + .filter { m: io.airbyte.protocol.models.v0.AirbyteMessage -> + m.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE + } .findFirst() .orElseThrow { IllegalArgumentException( - "All message sets used for testing should include a state record") + "All message sets used for testing should include a state record" + ) }!! Collections.reverse(destinationOutput) - val actualStateMessage = destinationOutput + val actualStateMessage = + destinationOutput .stream() - .filter { m: io.airbyte.protocol.models.v0.AirbyteMessage? -> m!!.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE } + .filter { m: io.airbyte.protocol.models.v0.AirbyteMessage? -> + m!!.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE + } .findFirst() .map { msg: io.airbyte.protocol.models.v0.AirbyteMessage? -> // Modify state message to remove destination stats. @@ -1258,25 +1627,49 @@ abstract class DestinationAcceptanceTest { @Throws(Exception::class) private fun runSync( - config: JsonNode, - messages: List, - catalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, - runNormalization: Boolean): List { - val destinationConfig = WorkerDestinationConfig() + config: JsonNode, + messages: List, + catalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + runNormalization: Boolean + ): List { + val destinationConfig = + WorkerDestinationConfig() .withConnectionId(UUID.randomUUID()) - .withCatalog(convertProtocolObject(catalog, ConfiguredAirbyteCatalog::class.java)) + .withCatalog( + convertProtocolObject( + catalog, + io.airbyte.protocol.models.ConfiguredAirbyteCatalog::class.java + ) + ) .withDestinationConnectionConfiguration(config) val destination = destination - destination.start(destinationConfig, jobRoot, inDestinationNormalizationFlags(runNormalization)) + destination.start( + destinationConfig, + jobRoot, + inDestinationNormalizationFlags(runNormalization) + ) messages.forEach( - Consumer { message: io.airbyte.protocol.models.v0.AirbyteMessage -> Exceptions.toRuntime { destination.accept(convertProtocolObject(message, AirbyteMessage::class.java)) } }) + Consumer { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + Exceptions.toRuntime { + destination.accept( + convertProtocolObject( + message, + io.airbyte.protocol.models.AirbyteMessage::class.java + ) + ) + } + } + ) destination.notifyEndOfInput() - val destinationOutput: MutableList = ArrayList() + val destinationOutput: MutableList = + ArrayList() while (!destination.isFinished) { - destination.attemptRead().ifPresent { m: AirbyteMessage -> destinationOutput.add(convertProtocolObject(m, io.airbyte.protocol.models.v0.AirbyteMessage::class.java)) } + destination.attemptRead().ifPresent { m: io.airbyte.protocol.models.AirbyteMessage -> + destinationOutput.add(convertProtocolObject(m, AirbyteMessage::class.java)) + } } destination.close() @@ -1285,15 +1678,24 @@ abstract class DestinationAcceptanceTest { return destinationOutput } - val runner: NormalizationRunner = DefaultNormalizationRunner( + val runner: NormalizationRunner = + DefaultNormalizationRunner( processFactory, normalizationImageName, - normalizationIntegrationType) + normalizationIntegrationType + ) runner.start() val normalizationRoot = Files.createDirectories(jobRoot!!.resolve("normalize")) - if (!runner.normalize(JOB_ID, JOB_ATTEMPT, normalizationRoot, - destinationConfig.destinationConnectionConfiguration, - destinationConfig.catalog, null)) { + if ( + !runner.normalize( + JOB_ID, + JOB_ATTEMPT, + normalizationRoot, + destinationConfig.destinationConnectionConfiguration, + destinationConfig.catalog, + null + ) + ) { throw TestHarnessException("Normalization Failed.") } runner.close() @@ -1301,19 +1703,23 @@ abstract class DestinationAcceptanceTest { } @Throws(Exception::class) - protected fun retrieveRawRecordsAndAssertSameMessages(catalog: AirbyteCatalog, - messages: List, - defaultSchema: String?) { + protected fun retrieveRawRecordsAndAssertSameMessages( + catalog: AirbyteCatalog, + messages: List, + defaultSchema: String? + ) { val actualMessages: MutableList = ArrayList() for (stream in catalog.streams) { val streamName = stream.name val schema = if (stream.namespace != null) stream.namespace else defaultSchema!! - val msgList = retrieveRecords(testEnv, streamName, schema, - stream.jsonSchema) + val msgList = + retrieveRecords(testEnv, streamName, schema, stream.jsonSchema) .stream() .map { data: JsonNode? -> - AirbyteRecordMessage().withStream(streamName).withNamespace(schema) - .withData(data) + AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(schema) + .withData(data) } .toList() actualMessages.addAll(msgList) @@ -1323,37 +1729,54 @@ abstract class DestinationAcceptanceTest { } // ignores emitted at. - protected fun assertSameMessages(expected: List, - actual: List, - pruneAirbyteInternalFields: Boolean) { - val expectedProcessed = expected.stream() - .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> message.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD } + protected fun assertSameMessages( + expected: List, + actual: List, + pruneAirbyteInternalFields: Boolean + ) { + val expectedProcessed = + expected + .stream() + .filter { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + message.type == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD + } .map { obj: io.airbyte.protocol.models.v0.AirbyteMessage -> obj.record } .peek { recordMessage: AirbyteRecordMessage -> recordMessage.emittedAt = null } - .map { recordMessage: AirbyteRecordMessage -> if (pruneAirbyteInternalFields) safePrune(recordMessage) else recordMessage } + .map { recordMessage: AirbyteRecordMessage -> + if (pruneAirbyteInternalFields) safePrune(recordMessage) else recordMessage + } .map { obj: AirbyteRecordMessage -> obj.data } .collect(Collectors.toList()) - val actualProcessed = actual.stream() - .map { recordMessage: AirbyteRecordMessage -> if (pruneAirbyteInternalFields) safePrune(recordMessage) else recordMessage } + val actualProcessed = + actual + .stream() + .map { recordMessage: AirbyteRecordMessage -> + if (pruneAirbyteInternalFields) safePrune(recordMessage) else recordMessage + } .map { obj: AirbyteRecordMessage -> obj.data } .collect(Collectors.toList()) - testDataComparator.assertSameData(expectedProcessed, actualProcessed) + _testDataComparator.assertSameData(expectedProcessed, actualProcessed) } @Throws(Exception::class) - protected fun retrieveNormalizedRecords(catalog: AirbyteCatalog, - defaultSchema: String?): List { + protected fun retrieveNormalizedRecords( + catalog: AirbyteCatalog, + defaultSchema: String? + ): List { val actualMessages: MutableList = ArrayList() for (stream in catalog.streams) { val streamName = stream.name - val msgList = retrieveNormalizedRecords(testEnv, streamName, - defaultSchema) + val msgList = + retrieveNormalizedRecords(testEnv, streamName, defaultSchema) .stream() - .map { data: JsonNode? -> AirbyteRecordMessage().withStream(streamName).withData(data) }.toList() + .map { data: JsonNode? -> + AirbyteRecordMessage().withStream(streamName).withData(data) + } + .toList() actualMessages.addAll(msgList) } return actualMessages @@ -1361,20 +1784,19 @@ abstract class DestinationAcceptanceTest { class TestDestinationEnv(val localRoot: Path?) { override fun toString(): String { - return "TestDestinationEnv{" + - "localRoot=" + localRoot + - '}' + return "TestDestinationEnv{" + "localRoot=" + localRoot + '}' } } /** - * This test MUST be disabled by default, but you may uncomment it and use when need to reproduce a - * performance issue for destination. This test helps you to emulate lot's of stream and messages in - * each simply changing the "streamsSize" args to set a number of tables\streams and the - * "messagesNumber" to a messages number that would be written in each stream. !!! Do NOT forget to - * manually remove all generated objects !!! Hint: To check the destination container output run - * "docker ps" command in console to find the container's id. Then run "docker container attach - * your_containers_id" (ex. docker container attach 18cc929f44c8) to see the container's output + * This test MUST be disabled by default, but you may uncomment it and use when need to + * reproduce a performance issue for destination. This test helps you to emulate lot's of stream + * and messages in each simply changing the "streamsSize" args to set a number of tables\streams + * and the "messagesNumber" to a messages number that would be written in each stream. !!! Do + * NOT forget to manually remove all generated objects !!! Hint: To check the destination + * container output run "docker ps" command in console to find the container's id. Then run + * "docker container attach your_containers_id" (ex. docker container attach 18cc929f44c8) to + * see the container's output */ @Test @Disabled @@ -1391,20 +1813,27 @@ abstract class DestinationAcceptanceTest { // generate schema\catalogs val configuredAirbyteStreams: MutableList = ArrayList() for (i in 0 until streamsSize) { - configuredAirbyteStreams - .add(CatalogHelpers.createAirbyteStream(USERS_STREAM_NAME + i, - Field.of(NAME, JsonSchemaType.STRING), - Field - .of(ID, JsonSchemaType.STRING))) + configuredAirbyteStreams.add( + CatalogHelpers.createAirbyteStream( + USERS_STREAM_NAME + i, + Field.of(NAME, JsonSchemaType.STRING), + Field.of(ID, JsonSchemaType.STRING) + ) + ) } val testCatalog = AirbyteCatalog().withStreams(configuredAirbyteStreams) - val configuredTestCatalog = CatalogHelpers - .toDefaultConfiguredCatalog(testCatalog) + val configuredTestCatalog = CatalogHelpers.toDefaultConfiguredCatalog(testCatalog) - val config = config - val destinationConfig = WorkerDestinationConfig() + val config = getConfig() + val destinationConfig = + WorkerDestinationConfig() .withConnectionId(UUID.randomUUID()) - .withCatalog(convertProtocolObject(configuredTestCatalog, ConfiguredAirbyteCatalog::class.java)) + .withCatalog( + convertProtocolObject( + configuredTestCatalog, + io.airbyte.protocol.models.ConfiguredAirbyteCatalog::class.java + ) + ) .withDestinationConnectionConfiguration(config) val destination = destination @@ -1414,12 +1843,18 @@ abstract class DestinationAcceptanceTest { val currentStreamNumber = AtomicInteger(0) val currentRecordNumberForStream = AtomicInteger(0) - // this is just a current state logger. Useful when running long hours tests to see the progress + // this is just a current state logger. Useful when running long hours tests to see the + // progress val countPrinter = Thread { while (true) { println( - "currentStreamNumber=" + currentStreamNumber + ", currentRecordNumberForStream=" - + currentRecordNumberForStream + ", " + Instant.now()) + "currentStreamNumber=" + + currentStreamNumber + + ", currentRecordNumberForStream=" + + currentRecordNumberForStream + + ", " + + Instant.now() + ) try { Thread.sleep(10000) } catch (e: InterruptedException) { @@ -1435,17 +1870,30 @@ abstract class DestinationAcceptanceTest { // iterate through msm inside a particular stream // Generate messages and put it to stream for (msgCounter in 0 until messagesNumber) { - val msg = io.airbyte.protocol.models.v0.AirbyteMessage() + val msg = + io.airbyte.protocol.models.v0 + .AirbyteMessage() .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) - .withRecord(AirbyteRecordMessage().withStream(USERS_STREAM_NAME + streamCounter) + .withRecord( + AirbyteRecordMessage() + .withStream(USERS_STREAM_NAME + streamCounter) .withData( - Jsons.jsonNode( - ImmutableMap.builder().put(NAME, LOREM_IPSUM) - .put(ID, streamCounter.toString() + "_" + msgCounter) - .build())) - .withEmittedAt(Instant.now().toEpochMilli())) + Jsons.jsonNode( + ImmutableMap.builder() + .put(NAME, LOREM_IPSUM) + .put(ID, streamCounter.toString() + "_" + msgCounter) + .build() + ) + ) + .withEmittedAt(Instant.now().toEpochMilli()) + ) try { - destination.accept(convertProtocolObject(msg, AirbyteMessage::class.java)) + destination.accept( + convertProtocolObject( + msg, + io.airbyte.protocol.models.AirbyteMessage::class.java + ) + ) } catch (e: Exception) { LOGGER.error("Failed to write a RECORD message: $e") throw RuntimeException(e) @@ -1455,13 +1903,27 @@ abstract class DestinationAcceptanceTest { } // send state message here, it's required - val msgState = io.airbyte.protocol.models.v0.AirbyteMessage() + val msgState = + io.airbyte.protocol.models.v0 + .AirbyteMessage() .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.STATE) - .withState(AirbyteStateMessage() + .withState( + AirbyteStateMessage() .withData( - Jsons.jsonNode(ImmutableMap.builder().put("start_date", "2020-09-02").build()))) + Jsons.jsonNode( + ImmutableMap.builder() + .put("start_date", "2020-09-02") + .build() + ) + ) + ) try { - destination.accept(convertProtocolObject(msgState, AirbyteMessage::class.java)) + destination.accept( + convertProtocolObject( + msgState, + io.airbyte.protocol.models.AirbyteMessage::class.java + ) + ) } catch (e: Exception) { LOGGER.error("Failed to write a STATE message: $e") throw RuntimeException(e) @@ -1470,16 +1932,17 @@ abstract class DestinationAcceptanceTest { currentStreamNumber.set(streamCounter) } - LOGGER.info(String.format("Added %s messages to each of %s streams", currentRecordNumberForStream, - currentStreamNumber)) + LOGGER.info( + String.format( + "Added %s messages to each of %s streams", + currentRecordNumberForStream, + currentStreamNumber + ) + ) // Close destination destination.notifyEndOfInput() } - protected open fun getTestDataComparator(): TestDataComparator { - return BasicTestDataComparator { identifier: String? -> this.resolveIdentifier(identifier) } - } - protected open fun supportBasicDataTypeTest(): Boolean { return false } @@ -1496,37 +1959,40 @@ abstract class DestinationAcceptanceTest { return false } - open val protocolVersion: ProtocolVersion - /** - * The method should be overridden if destination connector support newer protocol version otherwise - * [io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion.V0] is used - * - * - * NOTE: Method should be public in a sake of java reflection - * - * @return - */ - get() = ProtocolVersion.V0 + /** + * The method should be overridden if destination connector support newer protocol version + * otherwise [io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion.V0] is used + * + * NOTE: Method should be public in a sake of java reflection + * + * @return + */ + open fun getProtocolVersion(): ProtocolVersion = ProtocolVersion.V0 private fun checkTestCompatibility( - testCompatibility: DataTypeTestArgumentProvider.TestCompatibility): Boolean { - return testCompatibility.isTestCompatible(supportBasicDataTypeTest(), - supportArrayDataTypeTest(), supportObjectDataTypeTest()) + testCompatibility: DataTypeTestArgumentProvider.TestCompatibility + ): Boolean { + return testCompatibility.isTestCompatible( + supportBasicDataTypeTest(), + supportArrayDataTypeTest(), + supportObjectDataTypeTest() + ) } @ParameterizedTest @ArgumentsSource(DataTypeTestArgumentProvider::class) @Throws(Exception::class) - fun testDataTypeTestWithNormalization(messagesFilename: String?, - catalogFilename: String?, - testCompatibility: DataTypeTestArgumentProvider.TestCompatibility) { + fun testDataTypeTestWithNormalization( + messagesFilename: String?, + catalogFilename: String?, + testCompatibility: DataTypeTestArgumentProvider.TestCompatibility + ) { if (!checkTestCompatibility(testCompatibility)) { return } val catalog = readCatalogFromFile(catalogFilename) - val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( - catalog) + val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) val messages = readMessagesFromFile(messagesFilename) runAndCheck(catalog, configuredCatalog, messages) @@ -1537,13 +2003,25 @@ abstract class DestinationAcceptanceTest { fun testSyncNumberNanDataType() { // NaN/Infinity protocol supports started from V1 version or higher val numericTypesSupport = specialNumericTypesSupportTest - if (protocolVersion == ProtocolVersion.V0 || !numericTypesSupport.isSupportNumberNan()) { + if (getProtocolVersion() == ProtocolVersion.V0 || !numericTypesSupport.supportNumberNan) { return } - val catalog = readCatalogFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.NUMBER_TYPE_CATALOG, protocolVersion)) + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NUMBER_TYPE_CATALOG, + getProtocolVersion() + ) + ) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messages = readMessagesFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.NAN_TYPE_MESSAGE, protocolVersion)) - val config = config + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NAN_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() val defaultSchema = getDefaultSchema(config) runAndCheck(catalog, configuredCatalog, messages) @@ -1554,13 +2032,25 @@ abstract class DestinationAcceptanceTest { fun testSyncIntegerNanDataType() { // NaN/Infinity protocol supports started from V1 version or higher val numericTypesSupport = specialNumericTypesSupportTest - if (protocolVersion == ProtocolVersion.V0 || !numericTypesSupport.isSupportIntegerNan()) { + if (getProtocolVersion() == ProtocolVersion.V0 || !numericTypesSupport.supportIntegerNan) { return } - val catalog = readCatalogFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.INTEGER_TYPE_CATALOG, protocolVersion)) + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INTEGER_TYPE_CATALOG, + getProtocolVersion() + ) + ) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messages = readMessagesFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.NAN_TYPE_MESSAGE, protocolVersion)) - val config = config + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NAN_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() val defaultSchema = getDefaultSchema(config) runAndCheck(catalog, configuredCatalog, messages) @@ -1571,13 +2061,27 @@ abstract class DestinationAcceptanceTest { fun testSyncNumberInfinityDataType() { // NaN/Infinity protocol supports started from V1 version or higher val numericTypesSupport = specialNumericTypesSupportTest - if (protocolVersion == ProtocolVersion.V0 || !numericTypesSupport.isSupportNumberInfinity()) { + if ( + getProtocolVersion() == ProtocolVersion.V0 || !numericTypesSupport.supportNumberInfinity + ) { return } - val catalog = readCatalogFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.NUMBER_TYPE_CATALOG, protocolVersion)) + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.NUMBER_TYPE_CATALOG, + getProtocolVersion() + ) + ) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messages = readMessagesFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.INFINITY_TYPE_MESSAGE, protocolVersion)) - val config = config + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INFINITY_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() val defaultSchema = getDefaultSchema(config) runAndCheck(catalog, configuredCatalog, messages) @@ -1588,20 +2092,39 @@ abstract class DestinationAcceptanceTest { fun testSyncIntegerInfinityDataType() { // NaN/Infinity protocol supports started from V1 version or higher val numericTypesSupport = specialNumericTypesSupportTest - if (protocolVersion == ProtocolVersion.V0 || !numericTypesSupport.isSupportIntegerInfinity()) { + if ( + getProtocolVersion() == ProtocolVersion.V0 || + !numericTypesSupport.supportIntegerInfinity + ) { return } - val catalog = readCatalogFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.INTEGER_TYPE_CATALOG, protocolVersion)) + val catalog = + readCatalogFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INTEGER_TYPE_CATALOG, + getProtocolVersion() + ) + ) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) - val messages = readMessagesFromFile(ArgumentProviderUtil.prefixFileNameByVersion(DataTypeTestArgumentProvider.Companion.INFINITY_TYPE_MESSAGE, protocolVersion)) - val config = config + val messages = + readMessagesFromFile( + ArgumentProviderUtil.prefixFileNameByVersion( + DataTypeTestArgumentProvider.Companion.INFINITY_TYPE_MESSAGE, + getProtocolVersion() + ) + ) + val config = getConfig() val defaultSchema = getDefaultSchema(config) runAndCheck(catalog, configuredCatalog, messages) } @Throws(Exception::class) - private fun runAndCheck(catalog: AirbyteCatalog, configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, messages: List) { + private fun runAndCheck( + catalog: AirbyteCatalog, + configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + messages: List + ) { if (normalizationFromDefinition()) { LOGGER.info("Normalization is supported! Run test with normalization.") runAndCheckWithNormalization(messages, configuredCatalog, catalog) @@ -1612,61 +2135,65 @@ abstract class DestinationAcceptanceTest { } @Throws(Exception::class) - private fun runAndCheckWithNormalization(messages: List, - configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, - catalog: AirbyteCatalog) { - val config = config + private fun runAndCheckWithNormalization( + messages: List, + configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + catalog: AirbyteCatalog + ) { + val config = getConfig() runSyncAndVerifyStateOutput(config, messages, configuredCatalog, true) - val actualMessages = retrieveNormalizedRecords(catalog, - getDefaultSchema(config)) + val actualMessages = retrieveNormalizedRecords(catalog, getDefaultSchema(config)) assertSameMessages(messages, actualMessages, true) } @Throws(Exception::class) - private fun runAndCheckWithoutNormalization(messages: List, - configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, - catalog: AirbyteCatalog) { - val config = config + private fun runAndCheckWithoutNormalization( + messages: List, + configuredCatalog: io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog, + catalog: AirbyteCatalog + ) { + val config = getConfig() runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) retrieveRawRecordsAndAssertSameMessages(catalog, messages, getDefaultSchema(config)) } /** - * Can be used in overridden [ getSpecialNumericTypesSupportTest()][.getSpecialNumericTypesSupportTest] method to specify if connector supports Integer/Number NaN - * or Integer/Number Infinity types + * Can be used in overridden [ + * getSpecialNumericTypesSupportTest()][.getSpecialNumericTypesSupportTest] method to specify if + * connector supports Integer/Number NaN or Integer/Number Infinity types */ - @Builder - @Getter - class SpecialNumericTypes { - @Builder.Default - var supportIntegerNan: Boolean = false - - @Builder.Default - var supportNumberNan: Boolean = false - - @Builder.Default - var supportIntegerInfinity: Boolean = false - - @Builder.Default - var supportNumberInfinity: Boolean = false - } + class SpecialNumericTypes( + val supportIntegerNan: Boolean = false, + val supportNumberNan: Boolean = false, + val supportIntegerInfinity: Boolean = false, + val supportNumberInfinity: Boolean = false + ) class NamespaceTestCaseProvider : ArgumentsProvider { @Throws(Exception::class) override fun provideArguments(context: ExtensionContext): Stream { - val testCases = - Jsons.deserialize(MoreResources.readResource(NAMESPACE_TEST_CASES_JSON)) - return MoreIterators.toList(testCases.elements()).stream() - .filter { testCase: JsonNode -> testCase["enabled"].asBoolean() } - .map { testCase: JsonNode -> - val namespaceInCatalog = TestingNamespaces.generate(testCase["namespace"].asText()) - val namespaceInDst = TestingNamespaces.generateFromOriginal(namespaceInCatalog, testCase["namespace"].asText(), testCase["normalized"].asText()) - Arguments.of( - testCase["id"].asText(), // Add uniqueness to namespace to avoid collisions between tests. - namespaceInCatalog, - namespaceInDst) - } + val testCases = Jsons.deserialize(MoreResources.readResource(NAMESPACE_TEST_CASES_JSON)) + return MoreIterators.toList(testCases.elements()) + .stream() + .filter { testCase: JsonNode -> testCase["enabled"].asBoolean() } + .map { testCase: JsonNode -> + val namespaceInCatalog = + TestingNamespaces.generate(testCase["namespace"].asText()) + val namespaceInDst = + TestingNamespaces.generateFromOriginal( + namespaceInCatalog, + testCase["namespace"].asText(), + testCase["normalized"].asText() + ) + Arguments.of( + testCase["id"] + .asText(), // Add uniqueness to namespace to avoid collisions between + // tests. + namespaceInCatalog, + namespaceInDst + ) + } } companion object { @@ -1695,19 +2222,20 @@ abstract class DestinationAcceptanceTest { * * @param list to reverse * @param type - * @return new list with elements of original reversed. - */ - fun reversed(list: List?): List { + * @return new list with elements of original reversed. + */ + fun reversed(list: List): List { val reversed = ArrayList(list) Collections.reverse(reversed) return reversed } /** - * Same as [.pruneMutate], except does a defensive copy and returns a new json node - * object instead of mutating in place. + * Same as [.pruneMutate], except does a defensive copy and returns a new json node object + * instead of mutating in place. * - * @param record - record that will be pruned. + * @param record + * - record that will be pruned. * @return pruned json node. */ private fun safePrune(record: AirbyteRecordMessage): AirbyteRecordMessage { @@ -1717,11 +2245,12 @@ abstract class DestinationAcceptanceTest { } /** - * Prune fields that are added internally by airbyte and are not part of the original data. Used so - * that we can compare data that is persisted by an Airbyte worker to the original data. This method - * mutates the provided json in place. + * Prune fields that are added internally by airbyte and are not part of the original data. + * Used so that we can compare data that is persisted by an Airbyte worker to the original + * data. This method mutates the provided json in place. * - * @param json - json that will be pruned. will be mutated in place! + * @param json + * - json that will be pruned. will be mutated in place! */ private fun pruneMutate(json: JsonNode) { for (key in Jsons.keys(json)) { @@ -1734,11 +2263,15 @@ abstract class DestinationAcceptanceTest { // prune the following // - airbyte internal fields // - fields that match what airbyte generates as hash ids - // - null values -- normalization will often return `: null` but in the original data that key - // likely did not exist in the original message. the most consistent thing to do is always remove - // the null fields (this choice does decrease our ability to check that normalization creates + // - null values -- normalization will often return `: null` but in the + // original data that key + // likely did not exist in the original message. the most consistent thing to do is + // always remove + // the null fields (this choice does decrease our ability to check that + // normalization creates // columns even if all the values in that column are null) - val airbyteInternalFields = Sets.newHashSet( + val airbyteInternalFields = + Sets.newHashSet( "emitted_at", "ab_id", "normalized_at", @@ -1747,67 +2280,74 @@ abstract class DestinationAcceptanceTest { "NORMALIZED_AT", "HASHID", "unique_key", - "UNIQUE_KEY") - if (airbyteInternalFields.stream() - .anyMatch { internalField: String -> key.lowercase(Locale.getDefault()).contains(internalField.lowercase(Locale.getDefault())) } - || json[key].isNull) { + "UNIQUE_KEY" + ) + if ( + airbyteInternalFields.stream().anyMatch { internalField: String -> + key.lowercase(Locale.getDefault()) + .contains(internalField.lowercase(Locale.getDefault())) + } || json[key].isNull + ) { (json as ObjectNode).remove(key) } } } - private const val LOREM_IPSUM = ("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque malesuada lacinia aliquet. Nam feugiat mauris vel magna dignissim feugiat. Nam non dapibus sapien, ac mattis purus. Donec mollis libero erat, a rutrum ipsum pretium id. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Integer nec aliquam leo. Aliquam eu dictum augue, a ornare elit.\n" - + "\n" - + "Nulla viverra blandit neque. Nam blandit varius efficitur. Nunc at sapien blandit, malesuada lectus vel, tincidunt orci. Proin blandit metus eget libero facilisis interdum. Aenean luctus scelerisque orci, at scelerisque sem vestibulum in. Nullam ornare massa sed dui efficitur, eget volutpat lectus elementum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Integer elementum mi vitae erat eleifend iaculis. Nullam eget tincidunt est, eget tempor est. Sed risus velit, iaculis vitae est in, volutpat consectetur odio. Aenean ut fringilla elit. Suspendisse non aliquet massa. Curabitur suscipit metus nunc, nec porttitor velit venenatis vel. Fusce vestibulum eleifend diam, lobortis auctor magna.\n" - + "\n" - + "Etiam maximus, mi feugiat pharetra mattis, nulla neque euismod metus, in congue nunc sem nec ligula. Curabitur aliquam, risus id convallis cursus, nunc orci sollicitudin enim, quis scelerisque nibh dui in ipsum. Suspendisse mollis, metus a dapibus scelerisque, sapien nulla pretium ipsum, non finibus sem orci et lectus. Aliquam dictum magna nisi, a consectetur urna euismod nec. In pulvinar facilisis nulla, id mollis libero pulvinar vel. Nam a commodo leo, eu commodo dolor. In hac habitasse platea dictumst. Curabitur auctor purus quis tortor laoreet efficitur. Quisque tincidunt, risus vel rutrum fermentum, libero urna dignissim augue, eget pulvinar nibh ligula ut tortor. Vivamus convallis non risus sed consectetur. Etiam accumsan enim ac nisl suscipit, vel congue lorem volutpat. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce non orci quis lacus rhoncus vestibulum nec ut magna. In varius lectus nec quam posuere finibus. Vivamus quis lectus vitae tortor sollicitudin fermentum.\n" - + "\n" - + "Pellentesque elementum vehicula egestas. Sed volutpat velit arcu, at imperdiet sapien consectetur facilisis. Suspendisse porttitor tincidunt interdum. Morbi gravida faucibus tortor, ut rutrum magna tincidunt a. Morbi eu nisi eget dui finibus hendrerit sit amet in augue. Aenean imperdiet lacus enim, a volutpat nulla placerat at. Suspendisse nibh ipsum, venenatis vel maximus ut, fringilla nec felis. Sed risus mi, egestas quis quam ullamcorper, pharetra vestibulum diam.\n" - + "\n" - + "Praesent finibus scelerisque elit, accumsan condimentum risus mattis vitae. Donec tristique hendrerit facilisis. Curabitur metus purus, venenatis non elementum id, finibus eu augue. Quisque posuere rhoncus ligula, et vehicula erat pulvinar at. Pellentesque vel quam vel lectus tincidunt congue quis id sapien. Ut efficitur mauris vitae pretium iaculis. Aliquam consectetur iaculis nisi vitae laoreet. Integer vel odio quis diam mattis tempor eget nec est. Donec iaculis facilisis neque, at dictum magna vestibulum ut. Sed malesuada non nunc ac consequat. Maecenas tempus lectus a nisl congue, ac venenatis diam viverra. Nam ac justo id nulla iaculis lobortis in eu ligula. Vivamus et ligula id sapien efficitur aliquet. Curabitur est justo, tempus vitae mollis quis, tincidunt vitae felis. Vestibulum molestie laoreet justo, nec mollis purus vulputate at.") + private const val LOREM_IPSUM = + ("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque malesuada lacinia aliquet. Nam feugiat mauris vel magna dignissim feugiat. Nam non dapibus sapien, ac mattis purus. Donec mollis libero erat, a rutrum ipsum pretium id. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Integer nec aliquam leo. Aliquam eu dictum augue, a ornare elit.\n" + + "\n" + + "Nulla viverra blandit neque. Nam blandit varius efficitur. Nunc at sapien blandit, malesuada lectus vel, tincidunt orci. Proin blandit metus eget libero facilisis interdum. Aenean luctus scelerisque orci, at scelerisque sem vestibulum in. Nullam ornare massa sed dui efficitur, eget volutpat lectus elementum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Integer elementum mi vitae erat eleifend iaculis. Nullam eget tincidunt est, eget tempor est. Sed risus velit, iaculis vitae est in, volutpat consectetur odio. Aenean ut fringilla elit. Suspendisse non aliquet massa. Curabitur suscipit metus nunc, nec porttitor velit venenatis vel. Fusce vestibulum eleifend diam, lobortis auctor magna.\n" + + "\n" + + "Etiam maximus, mi feugiat pharetra mattis, nulla neque euismod metus, in congue nunc sem nec ligula. Curabitur aliquam, risus id convallis cursus, nunc orci sollicitudin enim, quis scelerisque nibh dui in ipsum. Suspendisse mollis, metus a dapibus scelerisque, sapien nulla pretium ipsum, non finibus sem orci et lectus. Aliquam dictum magna nisi, a consectetur urna euismod nec. In pulvinar facilisis nulla, id mollis libero pulvinar vel. Nam a commodo leo, eu commodo dolor. In hac habitasse platea dictumst. Curabitur auctor purus quis tortor laoreet efficitur. Quisque tincidunt, risus vel rutrum fermentum, libero urna dignissim augue, eget pulvinar nibh ligula ut tortor. Vivamus convallis non risus sed consectetur. Etiam accumsan enim ac nisl suscipit, vel congue lorem volutpat. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce non orci quis lacus rhoncus vestibulum nec ut magna. In varius lectus nec quam posuere finibus. Vivamus quis lectus vitae tortor sollicitudin fermentum.\n" + + "\n" + + "Pellentesque elementum vehicula egestas. Sed volutpat velit arcu, at imperdiet sapien consectetur facilisis. Suspendisse porttitor tincidunt interdum. Morbi gravida faucibus tortor, ut rutrum magna tincidunt a. Morbi eu nisi eget dui finibus hendrerit sit amet in augue. Aenean imperdiet lacus enim, a volutpat nulla placerat at. Suspendisse nibh ipsum, venenatis vel maximus ut, fringilla nec felis. Sed risus mi, egestas quis quam ullamcorper, pharetra vestibulum diam.\n" + + "\n" + + "Praesent finibus scelerisque elit, accumsan condimentum risus mattis vitae. Donec tristique hendrerit facilisis. Curabitur metus purus, venenatis non elementum id, finibus eu augue. Quisque posuere rhoncus ligula, et vehicula erat pulvinar at. Pellentesque vel quam vel lectus tincidunt congue quis id sapien. Ut efficitur mauris vitae pretium iaculis. Aliquam consectetur iaculis nisi vitae laoreet. Integer vel odio quis diam mattis tempor eget nec est. Donec iaculis facilisis neque, at dictum magna vestibulum ut. Sed malesuada non nunc ac consequat. Maecenas tempus lectus a nisl congue, ac venenatis diam viverra. Nam ac justo id nulla iaculis lobortis in eu ligula. Vivamus et ligula id sapien efficitur aliquet. Curabitur est justo, tempus vitae mollis quis, tincidunt vitae felis. Vestibulum molestie laoreet justo, nec mollis purus vulputate at.") protected val specialNumericTypesSupportTest: SpecialNumericTypes /** - * NaN and Infinity test are not supported by default. Please override this method to specify - * NaN/Infinity types support example: + * NaN and Infinity test are not supported by default. Please override this method to + * specify NaN/Infinity types support example: * *
              *
-             * protected SpecialNumericTypes getSpecialNumericTypesSupportTest() {
-             * return SpecialNumericTypes.builder()
-             * .supportNumberNan(true)
-             * .supportIntegerNan(true)
-             * .build();
-             * }
-            
* + * protected SpecialNumericTypes getSpecialNumericTypesSupportTest() { return + * SpecialNumericTypes.builder() .supportNumberNan(true) .supportIntegerNan(true) + * .build(); } * * * @return SpecialNumericTypes with support flags */ - get() = SpecialNumericTypes.builder().build() + get() = SpecialNumericTypes() @Throws(IOException::class) private fun readCatalogFromFile(catalogFilename: String?): AirbyteCatalog { - return Jsons.deserialize(MoreResources.readResource(catalogFilename), AirbyteCatalog::class.java) + return Jsons.deserialize( + MoreResources.readResource(catalogFilename), + AirbyteCatalog::class.java + ) } @Throws(IOException::class) - private fun readMessagesFromFile(messagesFilename: String?): List { - return MoreResources.readResource(messagesFilename).lines() - .map(Function { record: String? -> Jsons.deserialize(record, io.airbyte.protocol.models.v0.AirbyteMessage::class.java) }) - .collect, Any>(Collectors.toList()) + private fun readMessagesFromFile( + messagesFilename: String? + ): List { + return MoreResources.readResource(messagesFilename).lines().map { + Jsons.deserialize(it, AirbyteMessage::class.java) + } } - /** - * Mutate the input airbyte record message namespace. - */ + /** Mutate the input airbyte record message namespace. */ private fun getRecordMessagesWithNewNamespace( - airbyteMessages: List, - namespace: String?): List { - airbyteMessages.forEach(Consumer { message: io.airbyte.protocol.models.v0.AirbyteMessage -> - if (message.record != null) { - message.record.namespace = namespace + airbyteMessages: List, + namespace: String? + ): List { + airbyteMessages.forEach( + Consumer { message: io.airbyte.protocol.models.v0.AirbyteMessage -> + if (message.record != null) { + message.record.namespace = namespace + } } - }) + ) return airbyteMessages } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt index f79c3b7d8dd7..bcbe334e834e 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/DestinationAcceptanceTestUtils.kt @@ -9,8 +9,11 @@ import io.airbyte.commons.json.Jsons object DestinationAcceptanceTestUtils { fun putStringIntoJson(stringValue: String?, fieldName: String?, node: ObjectNode) { - if (stringValue != null && (stringValue.startsWith("[") && stringValue.endsWith("]") - || stringValue.startsWith("{") && stringValue.endsWith("}"))) { + if ( + stringValue != null && + (stringValue.startsWith("[") && stringValue.endsWith("]") || + stringValue.startsWith("{") && stringValue.endsWith("}")) + ) { node.set(fieldName, Jsons.deserialize(stringValue)) } else { node.put(fieldName, stringValue) diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt index 6bc55d0b7a4a..a77b4ae71399 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/JdbcDestinationAcceptanceTest.kt @@ -5,10 +5,10 @@ package io.airbyte.cdk.integrations.standardtest.destination import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.ObjectMapper -import org.jooq.Field -import org.jooq.Record import java.util.* import java.util.function.Function +import org.jooq.Field +import org.jooq.Record abstract class JdbcDestinationAcceptanceTest : DestinationAcceptanceTest() { protected val mapper: ObjectMapper = ObjectMapper() @@ -17,7 +17,10 @@ abstract class JdbcDestinationAcceptanceTest : DestinationAcceptanceTest() { return getJsonFromRecord(record, Function { x: Any? -> Optional.empty() }) } - protected fun getJsonFromRecord(record: Record, valueParser: Function>): JsonNode { + protected fun getJsonFromRecord( + record: Record, + valueParser: Function> + ): JsonNode { val node = mapper.createObjectNode() Arrays.stream(record.fields()).forEach { field: Field<*> -> @@ -27,11 +30,18 @@ abstract class JdbcDestinationAcceptanceTest : DestinationAcceptanceTest() { node.put(field.name, parsedValue.get()) } else { when (field.dataType.typeName) { - "varchar", "nvarchar", "jsonb", "json", "other" -> { + "varchar", + "nvarchar", + "jsonb", + "json", + "other" -> { val stringValue = (value?.toString()) - DestinationAcceptanceTestUtils.putStringIntoJson(stringValue, field.name, node) + DestinationAcceptanceTestUtils.putStringIntoJson( + stringValue, + field.name, + node + ) } - else -> node.put(field.name, (value?.toString())) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt index 987f8c328b8f..9ad2d55c33cb 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/LocalAirbyteDestination.kt @@ -22,11 +22,19 @@ class LocalAirbyteDestination(private val dest: Destination) : AirbyteDestinatio private var isClosed = false @Throws(Exception::class) - override fun start(destinationConfig: WorkerDestinationConfig, jobRoot: Path, additionalEnvironmentVariables: Map) { + override fun start( + destinationConfig: WorkerDestinationConfig, + jobRoot: Path, + additionalEnvironmentVariables: Map + ) { consumer = - dest.getConsumer(destinationConfig.destinationConnectionConfiguration, - Jsons.`object`(Jsons.jsonNode(destinationConfig.catalog), ConfiguredAirbyteCatalog::class.java) - ) { obj: AirbyteMessage? -> Destination.defaultOutputRecordCollector() } + dest.getConsumer( + destinationConfig.destinationConnectionConfiguration, + Jsons.`object`( + Jsons.jsonNode(destinationConfig.catalog), + ConfiguredAirbyteCatalog::class.java + ) + ) { Destination::defaultOutputRecordCollector } consumer!!.start() } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt index 7d5a2e25df1a..8eec831460ba 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/PerStreamStateMessageTest.kt @@ -9,9 +9,9 @@ import io.airbyte.protocol.models.v0.AirbyteMessage import io.airbyte.protocol.models.v0.AirbyteStateMessage import io.airbyte.protocol.models.v0.AirbyteStreamState import io.airbyte.protocol.models.v0.StreamDescriptor +import java.util.function.Consumer import org.junit.jupiter.api.Test import org.mockito.Mockito -import java.util.function.Consumer abstract class PerStreamStateMessageTest { protected abstract val mockedConsumer: Consumer @@ -23,9 +23,12 @@ abstract class PerStreamStateMessageTest { @Test @Throws(Exception::class) fun ensureAllStateMessageAreEmitted() { - val airbyteMessage1 = AirbyteMessageCreator.createStreamStateMessage("name_one", "state_one") - val airbyteMessage2 = AirbyteMessageCreator.createStreamStateMessage("name_two", "state_two") - val airbyteMessage3 = AirbyteMessageCreator.createStreamStateMessage("name_three", "state_three") + val airbyteMessage1 = + AirbyteMessageCreator.createStreamStateMessage("name_one", "state_one") + val airbyteMessage2 = + AirbyteMessageCreator.createStreamStateMessage("name_two", "state_two") + val airbyteMessage3 = + AirbyteMessageCreator.createStreamStateMessage("name_three", "state_three") val messageConsumer = messageConsumer messageConsumer.accept(airbyteMessage1) @@ -43,16 +46,16 @@ abstract class PerStreamStateMessageTest { internal object AirbyteMessageCreator { fun createStreamStateMessage(name: String?, value: String): AirbyteMessage { return AirbyteMessage() - .withType(AirbyteMessage.Type.STATE) - .withState( - AirbyteStateMessage() - .withType(AirbyteStateMessage.AirbyteStateType.STREAM) - .withStream( - AirbyteStreamState() - .withStreamDescriptor( - StreamDescriptor() - .withName(name)) - .withStreamState(Jsons.jsonNode(value)))) + .withType(AirbyteMessage.Type.STATE) + .withState( + AirbyteStateMessage() + .withType(AirbyteStateMessage.AirbyteStateType.STREAM) + .withStream( + AirbyteStreamState() + .withStreamDescriptor(StreamDescriptor().withName(name)) + .withStreamState(Jsons.jsonNode(value)) + ) + ) } } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt index 73ea61071772..405a5702deb4 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/TestingNamespaces.kt @@ -3,7 +3,6 @@ */ package io.airbyte.cdk.integrations.standardtest.destination -import org.apache.commons.lang3.RandomStringUtils import java.time.Instant import java.time.LocalDate import java.time.ZoneId @@ -12,6 +11,7 @@ import java.time.format.DateTimeFormatter import java.time.format.DateTimeParseException import java.time.temporal.ChronoUnit import java.util.* +import org.apache.commons.lang3.RandomStringUtils /** * This class is used to generate unique namespaces for tests that follow a convention so that we @@ -19,9 +19,9 @@ import java.util.* * but there are exception cases that can prevent that from happening. We want to be able to * identify namespaces for which this has happened from their name, so we can take action. * - * * The convention we follow is `_test_YYYYMMDD_<8-character random suffix>`. - */ + * + */ object TestingNamespaces { private val FORMATTER: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyyMMdd") private const val SUFFIX_LENGTH = 5 @@ -41,7 +41,11 @@ object TestingNamespaces { @JvmOverloads fun generate(prefix: String? = null): String { val userDefinedPrefix = if (prefix != null) prefix + "_" else "" - return userDefinedPrefix + STANDARD_PREFIX + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC"))) + "_" + generateSuffix() + return userDefinedPrefix + + STANDARD_PREFIX + + FORMATTER.format(Instant.now().atZone(ZoneId.of("UTC"))) + + "_" + + generateSuffix() } fun generateFromOriginal(toOverwrite: String?, oldPrefix: String?, newPrefix: String?): String { @@ -60,8 +64,10 @@ object TestingNamespaces { private fun isOlderThan(namespace: String, timeMagnitude: Int, timeUnit: ChronoUnit): Boolean { return ifTestNamespaceGetDate(namespace) - .map { namespaceInstant: Instant -> namespaceInstant.isBefore(Instant.now().minus(timeMagnitude.toLong(), timeUnit)) } - .orElse(false) + .map { namespaceInstant: Instant -> + namespaceInstant.isBefore(Instant.now().minus(timeMagnitude.toLong(), timeUnit)) + } + .orElse(false) } private fun ifTestNamespaceGetDate(namespace: String): Optional { @@ -81,7 +87,9 @@ object TestingNamespaces { private fun parseDateOrEmpty(dateCandidate: String): Optional { return try { - Optional.ofNullable(LocalDate.parse(dateCandidate, FORMATTER).atStartOfDay().toInstant(ZoneOffset.UTC)) + Optional.ofNullable( + LocalDate.parse(dateCandidate, FORMATTER).atStartOfDay().toInstant(ZoneOffset.UTC) + ) } catch (e: DateTimeParseException) { Optional.empty() } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt index 7315697b4a8c..80214381a591 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataArgumentsProvider.kt @@ -5,10 +5,10 @@ package io.airbyte.cdk.integrations.standardtest.destination.argproviders import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import java.util.stream.Stream import org.junit.jupiter.api.extension.ExtensionContext import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.ArgumentsProvider -import java.util.stream.Stream /** * Class encapsulating all arguments required for Standard Destination Tests. @@ -20,25 +20,34 @@ class DataArgumentsProvider : ArgumentsProvider { override fun provideArguments(context: ExtensionContext): Stream { val protocolVersion = ArgumentProviderUtil.getProtocolVersion(context) return Stream.of( - Arguments.of(EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion), EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion)), - Arguments.of(EDGE_CASE_CONFIG.getMessageFileVersion(protocolVersion), EDGE_CASE_CONFIG.getCatalogFileVersion(protocolVersion)) // todo - need to use the new protocol to capture this. - // Arguments.of("stripe_messages.txt", "stripe_schema.json") - ) + Arguments.of( + EXCHANGE_RATE_CONFIG.getMessageFileVersion(protocolVersion), + EXCHANGE_RATE_CONFIG.getCatalogFileVersion(protocolVersion) + ), + Arguments.of( + EDGE_CASE_CONFIG.getMessageFileVersion(protocolVersion), + EDGE_CASE_CONFIG.getCatalogFileVersion(protocolVersion) + ) // todo - need to use the new protocol to capture this. + // Arguments.of("stripe_messages.txt", "stripe_schema.json") + ) } open class CatalogMessageTestConfigPair(val catalogFile: String, val messageFile: String) { - fun getCatalogFileVersion(protocolVersion: ProtocolVersion?): String? { + fun getCatalogFileVersion(protocolVersion: ProtocolVersion): String? { return ArgumentProviderUtil.prefixFileNameByVersion(catalogFile, protocolVersion) } - fun getMessageFileVersion(protocolVersion: ProtocolVersion?): String? { + fun getMessageFileVersion(protocolVersion: ProtocolVersion): String { return ArgumentProviderUtil.prefixFileNameByVersion(messageFile, protocolVersion) } } companion object { - val EXCHANGE_RATE_CONFIG: CatalogMessageTestConfigPair = CatalogMessageTestConfigPair("exchange_rate_catalog.json", "exchange_rate_messages.txt") - val EDGE_CASE_CONFIG: CatalogMessageTestConfigPair = CatalogMessageTestConfigPair("edge_case_catalog.json", "edge_case_messages.txt") - val NAMESPACE_CONFIG: CatalogMessageTestConfigPair = CatalogMessageTestConfigPair("namespace_catalog.json", "namespace_messages.txt") + val EXCHANGE_RATE_CONFIG: CatalogMessageTestConfigPair = + CatalogMessageTestConfigPair("exchange_rate_catalog.json", "exchange_rate_messages.txt") + val EDGE_CASE_CONFIG: CatalogMessageTestConfigPair = + CatalogMessageTestConfigPair("edge_case_catalog.json", "edge_case_messages.txt") + val NAMESPACE_CONFIG: CatalogMessageTestConfigPair = + CatalogMessageTestConfigPair("namespace_catalog.json", "namespace_messages.txt") } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt index 95703c82dbfa..ce2e18ab7807 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/DataTypeTestArgumentProvider.kt @@ -5,56 +5,81 @@ package io.airbyte.cdk.integrations.standardtest.destination.argproviders import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import java.util.stream.Stream import org.junit.jupiter.api.extension.ExtensionContext import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.ArgumentsProvider import org.slf4j.Logger import org.slf4j.LoggerFactory -import java.util.stream.Stream class DataTypeTestArgumentProvider : ArgumentsProvider { - private var protocolVersion: ProtocolVersion? = null + private lateinit var protocolVersion: ProtocolVersion @Throws(Exception::class) override fun provideArguments(context: ExtensionContext): Stream { protocolVersion = ArgumentProviderUtil.getProtocolVersion(context) return Stream.of( - getArguments(BASIC_TEST), - getArguments(ARRAY_TEST), - getArguments(OBJECT_TEST), - getArguments(OBJECT_WITH_ARRAY_TEST)) + getArguments(BASIC_TEST), + getArguments(ARRAY_TEST), + getArguments(OBJECT_TEST), + getArguments(OBJECT_WITH_ARRAY_TEST) + ) } private fun getArguments(testConfig: CatalogMessageTestConfigWithCompatibility): Arguments { - return Arguments.of(testConfig.getMessageFileVersion(protocolVersion), testConfig.getCatalogFileVersion(protocolVersion), - testConfig.testCompatibility) + return Arguments.of( + testConfig.getMessageFileVersion(protocolVersion), + testConfig.getCatalogFileVersion(protocolVersion), + testConfig.testCompatibility + ) } @JvmRecord - data class TestCompatibility(val requireBasicCompatibility: Boolean, - val requireArrayCompatibility: Boolean, - val requireObjectCompatibility: Boolean) { - fun isTestCompatible(supportBasicDataTypeTest: Boolean, supportArrayDataTypeTest: Boolean, supportObjectDataTypeTest: Boolean): Boolean { + data class TestCompatibility( + val requireBasicCompatibility: Boolean, + val requireArrayCompatibility: Boolean, + val requireObjectCompatibility: Boolean + ) { + fun isTestCompatible( + supportBasicDataTypeTest: Boolean, + supportArrayDataTypeTest: Boolean, + supportObjectDataTypeTest: Boolean + ): Boolean { LOGGER.info("---- Data type test compatibility ----") LOGGER.info("| Data type test | Require | Support |") - LOGGER.info("| Basic test | {} | {} |", (if (requireBasicCompatibility) "true " else "false"), - (if (supportBasicDataTypeTest) "true " else "false")) - LOGGER.info("| Array test | {} | {} |", (if (requireArrayCompatibility) "true " else "false"), - (if (supportArrayDataTypeTest) "true " else "false")) - LOGGER.info("| Object test | {} | {} |", (if (requireObjectCompatibility) "true " else "false"), - (if (supportObjectDataTypeTest) "true " else "false")) + LOGGER.info( + "| Basic test | {} | {} |", + (if (requireBasicCompatibility) "true " else "false"), + (if (supportBasicDataTypeTest) "true " else "false") + ) + LOGGER.info( + "| Array test | {} | {} |", + (if (requireArrayCompatibility) "true " else "false"), + (if (supportArrayDataTypeTest) "true " else "false") + ) + LOGGER.info( + "| Object test | {} | {} |", + (if (requireObjectCompatibility) "true " else "false"), + (if (supportObjectDataTypeTest) "true " else "false") + ) LOGGER.info("--------------------------------------") if (requireBasicCompatibility && !supportBasicDataTypeTest) { - LOGGER.warn("The destination doesn't support required Basic data type test. The test is skipped!") + LOGGER.warn( + "The destination doesn't support required Basic data type test. The test is skipped!" + ) return false } if (requireArrayCompatibility && !supportArrayDataTypeTest) { - LOGGER.warn("The destination doesn't support required Array data type test. The test is skipped!") + LOGGER.warn( + "The destination doesn't support required Array data type test. The test is skipped!" + ) return false } if (requireObjectCompatibility && !supportObjectDataTypeTest) { - LOGGER.warn("The destination doesn't support required Object data type test. The test is skipped!") + LOGGER.warn( + "The destination doesn't support required Object data type test. The test is skipped!" + ) return false } @@ -62,22 +87,43 @@ class DataTypeTestArgumentProvider : ArgumentsProvider { } } - class CatalogMessageTestConfigWithCompatibility(catalogFile: String, messageFile: String, val testCompatibility: TestCompatibility) : DataArgumentsProvider.CatalogMessageTestConfigPair(catalogFile, messageFile) + class CatalogMessageTestConfigWithCompatibility( + catalogFile: String, + messageFile: String, + val testCompatibility: TestCompatibility + ) : DataArgumentsProvider.CatalogMessageTestConfigPair(catalogFile, messageFile) companion object { - private val LOGGER: Logger = LoggerFactory.getLogger(DataTypeTestArgumentProvider::class.java) + private val LOGGER: Logger = + LoggerFactory.getLogger(DataTypeTestArgumentProvider::class.java) const val INTEGER_TYPE_CATALOG: String = "data_type_integer_type_test_catalog.json" const val NUMBER_TYPE_CATALOG: String = "data_type_number_type_test_catalog.json" const val NAN_TYPE_MESSAGE: String = "nan_type_test_message.txt" const val INFINITY_TYPE_MESSAGE: String = "nan_type_test_message.txt" - val BASIC_TEST: CatalogMessageTestConfigWithCompatibility = CatalogMessageTestConfigWithCompatibility("data_type_basic_test_catalog.json", "data_type_basic_test_messages.txt", - TestCompatibility(true, false, false)) - val ARRAY_TEST: CatalogMessageTestConfigWithCompatibility = CatalogMessageTestConfigWithCompatibility("data_type_array_test_catalog.json", "data_type_array_test_messages.txt", - TestCompatibility(true, true, false)) - val OBJECT_TEST: CatalogMessageTestConfigWithCompatibility = CatalogMessageTestConfigWithCompatibility("data_type_object_test_catalog.json", "data_type_object_test_messages.txt", - TestCompatibility(true, false, true)) - val OBJECT_WITH_ARRAY_TEST: CatalogMessageTestConfigWithCompatibility = CatalogMessageTestConfigWithCompatibility("data_type_array_object_test_catalog.json", "data_type_array_object_test_messages.txt", - TestCompatibility(true, true, true)) + val BASIC_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_basic_test_catalog.json", + "data_type_basic_test_messages.txt", + TestCompatibility(true, false, false) + ) + val ARRAY_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_array_test_catalog.json", + "data_type_array_test_messages.txt", + TestCompatibility(true, true, false) + ) + val OBJECT_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_object_test_catalog.json", + "data_type_object_test_messages.txt", + TestCompatibility(true, false, true) + ) + val OBJECT_WITH_ARRAY_TEST: CatalogMessageTestConfigWithCompatibility = + CatalogMessageTestConfigWithCompatibility( + "data_type_array_object_test_catalog.json", + "data_type_array_object_test_messages.txt", + TestCompatibility(true, true, true) + ) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt index 9db6a872458d..1d9bdb35c52a 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/NumberDataTypeTestArgumentProvider.kt @@ -5,30 +5,36 @@ package io.airbyte.cdk.integrations.standardtest.destination.argproviders import io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion import io.airbyte.cdk.integrations.standardtest.destination.argproviders.util.ArgumentProviderUtil +import java.util.stream.Stream import org.junit.jupiter.api.extension.ExtensionContext import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.ArgumentsProvider -import java.util.stream.Stream class NumberDataTypeTestArgumentProvider : ArgumentsProvider { - private var protocolVersion: ProtocolVersion? = null + private lateinit var protocolVersion: ProtocolVersion @Throws(Exception::class) override fun provideArguments(context: ExtensionContext): Stream { protocolVersion = ArgumentProviderUtil.getProtocolVersion(context) return Stream.of( - getArguments(NUMBER_DATA_TYPE_TEST_CATALOG, NUMBER_DATA_TYPE_TEST_MESSAGES), - getArguments(NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG, NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES)) + getArguments(NUMBER_DATA_TYPE_TEST_CATALOG, NUMBER_DATA_TYPE_TEST_MESSAGES), + getArguments(NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG, NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES) + ) } private fun getArguments(catalogFile: String, messageFile: String): Arguments { - return Arguments.of(ArgumentProviderUtil.prefixFileNameByVersion(catalogFile, protocolVersion), ArgumentProviderUtil.prefixFileNameByVersion(messageFile, protocolVersion)) + return Arguments.of( + ArgumentProviderUtil.prefixFileNameByVersion(catalogFile, protocolVersion), + ArgumentProviderUtil.prefixFileNameByVersion(messageFile, protocolVersion) + ) } companion object { const val NUMBER_DATA_TYPE_TEST_CATALOG: String = "number_data_type_test_catalog.json" const val NUMBER_DATA_TYPE_TEST_MESSAGES: String = "number_data_type_test_messages.txt" - const val NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG: String = "number_data_type_array_test_catalog.json" - const val NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES: String = "number_data_type_array_test_messages.txt" + const val NUMBER_DATA_TYPE_ARRAY_TEST_CATALOG: String = + "number_data_type_array_test_catalog.json" + const val NUMBER_DATA_TYPE_ARRAY_TEST_MESSAGES: String = + "number_data_type_array_test_messages.txt" } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt index b9babae0f2a3..d611e09dae7a 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/argproviders/util/ArgumentProviderUtil.kt @@ -11,27 +11,24 @@ object ArgumentProviderUtil { /** * This method use - * [io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion.getPrefix] to - * prefix the file name. - * + * [io.airbyte.cdk.integrations.standardtest.destination.ProtocolVersion.getPrefix] to prefix + * the file name. * * example: * - * * filename.json -> v0/filename.json * * @param fileName the original file name * @param protocolVersion supported protocol version * @return filename with protocol version prefix */ - fun prefixFileNameByVersion(fileName: String?, protocolVersion: ProtocolVersion?): String { - return String.format("%s/%s", protocolVersion.getPrefix(), fileName) + fun prefixFileNameByVersion(fileName: String?, protocolVersion: ProtocolVersion): String { + return String.format("%s/%s", protocolVersion.prefix, fileName) } /** * This method use reflection to get protocol version method from provided test context. * - * * NOTE: getProtocolVersion method should be public. * * @param context the context in which the current test is being executed. diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt index 4961d019358b..14ed4337b457 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/AdvancedTestDataComparator.kt @@ -4,13 +4,13 @@ package io.airbyte.cdk.integrations.standardtest.destination.comparator import com.fasterxml.jackson.databind.JsonNode -import org.junit.jupiter.api.Assertions -import org.slf4j.Logger -import org.slf4j.LoggerFactory import java.time.ZoneOffset import java.time.ZonedDateTime import java.time.format.DateTimeFormatter import java.time.format.DateTimeParseException +import org.junit.jupiter.api.Assertions +import org.slf4j.Logger +import org.slf4j.LoggerFactory open class AdvancedTestDataComparator : TestDataComparator { override fun assertSameData(expected: List, actual: List) { @@ -37,7 +37,11 @@ open class AdvancedTestDataComparator : TestDataComparator { val expectedEntry = expectedDataIterator.next() val expectedValue = expectedEntry.value val key = expectedEntry.key - val actualValue = ComparatorUtils.getActualValueByExpectedKey(key, actualObject) { identifier: String? -> this.resolveIdentifier(identifier) } + val actualValue = + ComparatorUtils.getActualValueByExpectedKey(key, actualObject) { + identifier: String? -> + this.resolveIdentifier(identifier) + } LOGGER.info("For {} Expected {} vs Actual {}", key, expectedValue, actualValue) assertSameValue(expectedValue, actualValue) } @@ -47,7 +51,8 @@ open class AdvancedTestDataComparator : TestDataComparator { } private fun isJsonNodeEmpty(jsonNode: JsonNode): Boolean { - return jsonNode.isEmpty || (jsonNode.size() == 1 && jsonNode.iterator().next().asText().isEmpty()) + return jsonNode.isEmpty || + (jsonNode.size() == 1 && jsonNode.iterator().next().asText().isEmpty()) } private fun areBothEmpty(expectedData: JsonNode, actualData: JsonNode): Boolean { @@ -58,7 +63,10 @@ open class AdvancedTestDataComparator : TestDataComparator { protected fun assertSameValue(expectedValue: JsonNode, actualValue: JsonNode?) { LOGGER.info("assertSameValue : {} vs {}", expectedValue, actualValue) - Assertions.assertTrue(compareJsonNodes(expectedValue, actualValue), "Expected value $expectedValue vs Actual value $actualValue") + Assertions.assertTrue( + compareJsonNodes(expectedValue, actualValue), + "Expected value $expectedValue vs Actual value $actualValue" + ) } protected fun compareJsonNodes(expectedValue: JsonNode?, actualValue: JsonNode?): Boolean { @@ -111,7 +119,13 @@ open class AdvancedTestDataComparator : TestDataComparator { return false } else { for (expectedNode in expectedList) { - val sameActualNode = actualList.stream().filter { actualNode: JsonNode? -> compareJsonNodes(expectedNode, actualNode) }.findFirst() + val sameActualNode = + actualList + .stream() + .filter { actualNode: JsonNode? -> + compareJsonNodes(expectedNode, actualNode) + } + .findFirst() if (sameActualNode.isPresent) { actualList.remove(sameActualNode.get()) } else { @@ -122,11 +136,17 @@ open class AdvancedTestDataComparator : TestDataComparator { } } - protected fun compareBooleanValues(firstBooleanValue: String, secondBooleanValue: String): Boolean { + protected fun compareBooleanValues( + firstBooleanValue: String, + secondBooleanValue: String + ): Boolean { return firstBooleanValue.toBoolean() == secondBooleanValue.toBoolean() } - protected fun compareNumericValues(firstNumericValue: String, secondNumericValue: String): Boolean { + protected fun compareNumericValues( + firstNumericValue: String, + secondNumericValue: String + ): Boolean { val firstValue = firstNumericValue.toDouble() val secondValue = secondNumericValue.toDouble() @@ -141,27 +161,44 @@ open class AdvancedTestDataComparator : TestDataComparator { protected fun isDateTimeWithTzValue(value: String): Boolean { return !TEST_DATASET_IGNORE_LIST.contains(value) && - value.matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$".toRegex()) + value.matches( + "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$".toRegex() + ) } - protected open fun parseDestinationDateWithTz(destinationValue: String?): ZonedDateTime { - return ZonedDateTime.parse(destinationValue, DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_WITH_TZ_FORMAT)).withZoneSameInstant(ZoneOffset.UTC) + protected open fun parseDestinationDateWithTz(destinationValue: String): ZonedDateTime { + return ZonedDateTime.parse( + destinationValue, + DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_WITH_TZ_FORMAT) + ) + .withZoneSameInstant(ZoneOffset.UTC) } - protected fun compareDateTimeWithTzValues(airbyteMessageValue: String, destinationValue: String): Boolean { + protected fun compareDateTimeWithTzValues( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { try { - val airbyteDate = ZonedDateTime.parse(airbyteMessageValue, airbyteDateTimeWithTzFormatter).withZoneSameInstant(ZoneOffset.UTC) + val airbyteDate = + ZonedDateTime.parse(airbyteMessageValue, airbyteDateTimeWithTzFormatter) + .withZoneSameInstant(ZoneOffset.UTC) val destinationDate = parseDestinationDateWithTz(destinationValue) return airbyteDate == destinationDate } catch (e: DateTimeParseException) { - LOGGER.warn("Fail to convert values to ZonedDateTime. Try to compare as text. Airbyte value({}), Destination value ({}). Exception: {}", - airbyteMessageValue, destinationValue, e) + LOGGER.warn( + "Fail to convert values to ZonedDateTime. Try to compare as text. Airbyte value({}), Destination value ({}). Exception: {}", + airbyteMessageValue, + destinationValue, + e + ) return compareTextValues(airbyteMessageValue, destinationValue) } } protected fun isDateTimeValue(value: String): Boolean { - return value.matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$".toRegex()) + return value.matches( + "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$".toRegex() + ) } protected fun isTimeWithTimezone(value: String): Boolean { @@ -172,7 +209,10 @@ open class AdvancedTestDataComparator : TestDataComparator { return value.matches("^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?$".toRegex()) } - protected open fun compareDateTimeValues(airbyteMessageValue: String, destinationValue: String): Boolean { + protected open fun compareDateTimeValues( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { return compareTextValues(airbyteMessageValue, destinationValue) } @@ -180,15 +220,24 @@ open class AdvancedTestDataComparator : TestDataComparator { return value.matches("^\\d{4}-\\d{2}-\\d{2}( BC)?$".toRegex()) } - protected open fun compareDateValues(airbyteMessageValue: String, destinationValue: String): Boolean { + protected open fun compareDateValues( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { return compareTextValues(airbyteMessageValue, destinationValue) } - protected open fun compareTimeWithoutTimeZone(airbyteMessageValue: String, destinationValue: String): Boolean { + protected open fun compareTimeWithoutTimeZone( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { return compareTextValues(airbyteMessageValue, destinationValue) } - protected fun compareTimeWithTimeZone(airbyteMessageValue: String, destinationValue: String): Boolean { + protected fun compareTimeWithTimeZone( + airbyteMessageValue: String, + destinationValue: String + ): Boolean { return compareTextValues(airbyteMessageValue, destinationValue) } @@ -203,18 +252,21 @@ open class AdvancedTestDataComparator : TestDataComparator { const val AIRBYTE_DATETIME_FORMAT: String = "yyyy-MM-dd'T'HH:mm:ss" const val AIRBYTE_DATETIME_PARSED_FORMAT: String = "yyyy-MM-dd HH:mm:ss.S" const val AIRBYTE_DATETIME_PARSED_FORMAT_TZ: String = "yyyy-MM-dd HH:mm:ss XXX" - const val AIRBYTE_DATETIME_WITH_TZ_FORMAT: String = ("[yyyy][yy]['-']['/']['.'][' '][MMM][MM][M]['-']['/']['.'][' '][dd][d]" - + "[[' ']['T']HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X][' '][G]]]") + const val AIRBYTE_DATETIME_WITH_TZ_FORMAT: String = + ("[yyyy][yy]['-']['/']['.'][' '][MMM][MM][M]['-']['/']['.'][' '][dd][d]" + + "[[' ']['T']HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X][' '][G]]]") // TODO revisit dataset which used date as string: exchange_rate_catalog.json // tried to change it to date time type but some connectors failed to store it e.i. // bigquery-denormalized - private val TEST_DATASET_IGNORE_LIST = setOf( + private val TEST_DATASET_IGNORE_LIST = + setOf( "2020-08-29T00:00:00Z", "2020-08-30T00:00:00Z", "2020-08-31T00:00:00Z", "2020-09-01T00:00:00Z", "2020-09-15T16:58:52.000000Z", - "2020-03-31T00:00:00Z") + "2020-03-31T00:00:00Z" + ) } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt index d31a49bfe711..e18d2ea54508 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/comparator/BasicTestDataComparator.kt @@ -4,12 +4,13 @@ package io.airbyte.cdk.integrations.standardtest.destination.comparator import com.fasterxml.jackson.databind.JsonNode +import java.util.function.Function import org.junit.jupiter.api.Assertions import org.slf4j.Logger import org.slf4j.LoggerFactory -import java.util.function.Function -class BasicTestDataComparator(private val nameResolver: Function>) : TestDataComparator { +class BasicTestDataComparator(private val nameResolver: Function>) : + TestDataComparator { override fun assertSameData(expected: List, actual: List) { LOGGER.info("Expected data {}", expected) LOGGER.info("Actual data {}", actual) @@ -27,7 +28,8 @@ class BasicTestDataComparator(private val nameResolver: Function>): JsonNode? { + fun getActualValueByExpectedKey( + expectedKey: String?, + actualJsonNode: JsonNode, + nameResolver: Function> + ): JsonNode? { for (actualKey in nameResolver.apply(expectedKey)) { if (actualJsonNode.has(actualKey)) { return actualJsonNode[actualKey] diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt index ef1bdbee7081..a79b1b246d70 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.kt @@ -6,20 +6,28 @@ package io.airbyte.cdk.integrations.standardtest.destination.typing_deduping import com.fasterxml.jackson.databind.JsonNode import io.airbyte.cdk.db.jdbc.JdbcDatabase import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_ID +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_RAW_ID +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA +import io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_EMITTED_AT +import io.airbyte.cdk.integrations.base.JavaBaseConstants.LEGACY_RAW_TABLE_COLUMNS import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType import io.airbyte.integrations.base.destination.typing_deduping.BaseSqlGeneratorIntegrationTest import io.airbyte.integrations.base.destination.typing_deduping.StreamId import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState +import java.sql.SQLException +import java.util.* import org.jooq.* import org.jooq.conf.ParamType import org.jooq.impl.DSL import org.jooq.impl.SQLDataType -import java.sql.SQLException -import java.util.* -abstract class JdbcSqlGeneratorIntegrationTest - : BaseSqlGeneratorIntegrationTest() { +abstract class JdbcSqlGeneratorIntegrationTest : + BaseSqlGeneratorIntegrationTest() { protected abstract val database: JdbcDatabase get @@ -40,37 +48,51 @@ abstract class JdbcSqlGeneratorIntegrationTest? @Throws(SQLException::class) - private fun insertRecords(tableName: Name, columnNames: List, records: List?, vararg columnsToParseJson: String) { - var insert = dslContext.insertInto( + private fun insertRecords( + tableName: Name, + columnNames: List, + records: List?, + vararg columnsToParseJson: String + ) { + var insert = + dslContext.insertInto( DSL.table(tableName), - columnNames.stream().map { columnName: String? -> DSL.field(DSL.quotedName(columnName)) }.toList()) + columnNames + .stream() + .map { columnName: String? -> DSL.field(DSL.quotedName(columnName)) } + .toList() + ) for (record in records!!) { - insert = insert.values( - columnNames.stream() - .map { fieldName: String -> - // Convert this field to a string. Pretty naive implementation. - val column = record[fieldName] - val columnAsString = if (column == null) { + insert = + insert.values( + columnNames + .stream() + .map { fieldName: String -> + // Convert this field to a string. Pretty naive implementation. + val column = record[fieldName] + val columnAsString = + if (column == null) { null } else if (column.isTextual) { column.asText() } else { column.toString() } - if (Arrays.asList(*columnsToParseJson).contains(fieldName)) { - return@map toJsonValue(columnAsString) - } else { - return@map DSL.`val`(columnAsString) - } + if (Arrays.asList(*columnsToParseJson).contains(fieldName)) { + return@map toJsonValue(columnAsString) + } else { + return@map DSL.`val`(columnAsString) } - .toList()) + } + .toList() + ) } database.execute(insert.getSQL(ParamType.INLINED)) } @@ -82,66 +104,87 @@ abstract class JdbcSqlGeneratorIntegrationTest?) { insertRecords( - DSL.name(streamId!!.rawNamespace, streamId.rawName), - JavaBaseConstants.V2_RAW_TABLE_COLUMN_NAMES, - records, - COLUMN_NAME_DATA, - COLUMN_NAME_AB_META) + DSL.name(streamId!!.rawNamespace, streamId.rawName), + JavaBaseConstants.V2_RAW_TABLE_COLUMN_NAMES, + records, + COLUMN_NAME_DATA, + COLUMN_NAME_AB_META + ) } @Throws(Exception::class) override fun insertV1RawTableRecords(streamId: StreamId?, records: List?) { insertRecords( - DSL.name(streamId!!.rawNamespace, streamId.rawName), - LEGACY_RAW_TABLE_COLUMNS, - records, - COLUMN_NAME_DATA) + DSL.name(streamId!!.rawNamespace, streamId.rawName), + LEGACY_RAW_TABLE_COLUMNS, + records, + COLUMN_NAME_DATA + ) } @Throws(Exception::class) - override fun insertFinalTableRecords(includeCdcDeletedAt: Boolean, - streamId: StreamId?, - suffix: String?, - records: List?) { + override fun insertFinalTableRecords( + includeCdcDeletedAt: Boolean, + streamId: StreamId?, + suffix: String?, + records: List? + ) { val columnNames = - if (includeCdcDeletedAt) FINAL_TABLE_COLUMN_NAMES_CDC else FINAL_TABLE_COLUMN_NAMES + if (includeCdcDeletedAt) FINAL_TABLE_COLUMN_NAMES_CDC else FINAL_TABLE_COLUMN_NAMES insertRecords( - DSL.name(streamId!!.finalNamespace, streamId.finalName + suffix), - columnNames, - records, - COLUMN_NAME_AB_META, "struct", "array", "unknown") + DSL.name(streamId!!.finalNamespace, streamId.finalName + suffix), + columnNames, + records, + COLUMN_NAME_AB_META, + "struct", + "array", + "unknown" + ) } @Throws(Exception::class) override fun dumpRawTableRecords(streamId: StreamId?): List { - return database.queryJsons(dslContext.selectFrom(DSL.name(streamId!!.rawNamespace, streamId.rawName)).getSQL(ParamType.INLINED)) + return database.queryJsons( + dslContext + .selectFrom(DSL.name(streamId!!.rawNamespace, streamId.rawName)) + .getSQL(ParamType.INLINED) + ) } @Throws(Exception::class) override fun dumpFinalTableRecords(streamId: StreamId?, suffix: String?): List { - return database - .queryJsons(dslContext.selectFrom(DSL.name(streamId!!.finalNamespace, streamId.finalName + suffix)).getSQL(ParamType.INLINED)) + return database.queryJsons( + dslContext + .selectFrom(DSL.name(streamId!!.finalNamespace, streamId.finalName + suffix)) + .getSQL(ParamType.INLINED) + ) } @Throws(Exception::class) diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt index a3e84f73dcd7..df73b7dac745 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.kt @@ -13,14 +13,14 @@ import io.airbyte.cdk.db.jdbc.JdbcUtils import io.airbyte.cdk.integrations.base.JavaBaseConstants import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest import io.airbyte.integrations.base.destination.typing_deduping.StreamId.Companion.concatenateRawTableName -import org.jooq.impl.DSL import javax.sql.DataSource +import org.jooq.impl.DSL /** * This class is largely the same as - * [io.airbyte.integrations.destination.snowflake.typing_deduping.AbstractSnowflakeTypingDedupingTest]. - * But (a) it uses jooq to construct the sql statements, and (b) it doesn't need to upcase anything. - * At some point we might (?) want to do a refactor to combine them. + * [io.airbyte.integrations.destination.snowflake.typing_deduping.AbstractSnowflakeTypingDedupingTest] + * . But (a) it uses jooq to construct the sql statements, and (b) it doesn't need to upcase + * anything. At some point we might (?) want to do a refactor to combine them. */ abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { private var database: JdbcDatabase? = null @@ -28,8 +28,8 @@ abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { protected abstract val baseConfig: ObjectNode /** - * Get the config as declared in GSM (or directly from the testcontainer). This class will do - * further modification to the config to ensure test isolation.i + * Get the config as declared in GSM (or directly from the testcontainer). This class will + * do further modification to the config to ensure test isolation.i */ get @@ -37,29 +37,30 @@ abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { protected val sourceOperations: JdbcCompatibleSourceOperations<*> /** - * Subclasses may need to return a custom source operations if the default one does not handle - * vendor-specific types correctly. For example, you most likely need to override this method to - * deserialize JSON columns to JsonNode. + * Subclasses may need to return a custom source operations if the default one does not + * handle vendor-specific types correctly. For example, you most likely need to override + * this method to deserialize JSON columns to JsonNode. */ get() = JdbcUtils.defaultSourceOperations protected val rawSchema: String /** - * Subclasses using a config with a nonstandard raw table schema should override this method. + * Subclasses using a config with a nonstandard raw table schema should override this + * method. */ get() = JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE /** - * Subclasses using a config where the default schema is not in the `schema` key should - * override this method and [.setDefaultSchema]. + * Subclasses using a config where the default schema is not in the `schema` key should override + * this method and [.setDefaultSchema]. */ protected fun getDefaultSchema(config: JsonNode): String { return config["schema"].asText() } /** - * Subclasses using a config where the default schema is not in the `schema` key should - * override this method and [.getDefaultSchema]. + * Subclasses using a config where the default schema is not in the `schema` key should override + * this method and [.getDefaultSchema]. */ protected fun setDefaultSchema(config: JsonNode, schema: String?) { (config as ObjectNode).put("schema", schema) @@ -74,7 +75,10 @@ abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { } @Throws(Exception::class) - override fun dumpRawTableRecords(streamNamespace: String?, streamName: String?): List { + override fun dumpRawTableRecords( + streamNamespace: String?, + streamName: String? + ): List { var streamNamespace = streamNamespace if (streamNamespace == null) { streamNamespace = getDefaultSchema(config!!) @@ -85,7 +89,10 @@ abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { } @Throws(Exception::class) - override fun dumpFinalTableRecords(streamNamespace: String?, streamName: String?): List { + override fun dumpFinalTableRecords( + streamNamespace: String?, + streamName: String? + ): List { var streamNamespace = streamNamespace if (streamNamespace == null) { streamNamespace = getDefaultSchema(config!!) @@ -99,7 +106,12 @@ abstract class JdbcTypingDedupingTest : BaseTypingDedupingTest() { if (streamNamespace == null) { streamNamespace = getDefaultSchema(config!!) } - database!!.execute(DSL.dropTableIfExists(DSL.name(rawSchema, concatenateRawTableName(streamNamespace, streamName!!))).sql) + database!!.execute( + DSL.dropTableIfExists( + DSL.name(rawSchema, concatenateRawTableName(streamNamespace, streamName!!)) + ) + .sql + ) database!!.execute(DSL.dropSchemaIfExists(DSL.name(streamNamespace)).cascade().sql) } diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt index 249e5ce09a28..b97b46d2a57d 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/gcs/BaseGcsDestination.kt @@ -71,7 +71,7 @@ abstract class BaseGcsDestination : BaseConnector(), Destination { override fun getConsumer( config: JsonNode, configuredCatalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { val gcsConfig: GcsDestinationConfig = GcsDestinationConfig.Companion.getGcsDestinationConfig(config) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt index b6f23623eff6..c2ebd0eac01a 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.kt @@ -181,7 +181,7 @@ abstract class GcsStreamCopier( } @Throws(Exception::class) - override fun generateMergeStatement(destTableName: String?): String? { + override fun generateMergeStatement(destTableName: String?): String { LOGGER.info( "Preparing to merge tmp table {} to dest table: {}, schema: {}, in destination.", tmpTableName, diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt index aebf1d02bf07..40c4a5b4c48d 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsAvroParquetDestinationAcceptanceTest.kt @@ -27,9 +27,7 @@ import org.junit.jupiter.params.provider.ArgumentsSource abstract class GcsAvroParquetDestinationAcceptanceTest(s3Format: S3Format) : GcsDestinationAcceptanceTest(s3Format) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 @ParameterizedTest @ArgumentsSource(NumberDataTypeTestArgumentProvider::class) @@ -38,8 +36,8 @@ abstract class GcsAvroParquetDestinationAcceptanceTest(s3Format: S3Format) : val catalog = readCatalogFromFile(catalogFileName) val messages = readMessagesFromFile(messagesFileName) - val config = getConfig() - val defaultSchema = getDefaultSchema(config!!) + val config = this.getConfig() + val defaultSchema = getDefaultSchema(config) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt index 0ca10fc74aa9..e10cbe9a46a5 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseAvroDestinationAcceptanceTest.kt @@ -30,18 +30,16 @@ abstract class GcsBaseAvroDestinationAcceptanceTest : }""" ) - override fun getTestDataComparator(): TestDataComparator { - return GcsAvroTestDataComparator() - } + override fun getTestDataComparator(): TestDataComparator = GcsAvroTestDataComparator() @Throws(Exception::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -92,7 +90,5 @@ abstract class GcsBaseAvroDestinationAcceptanceTest : return resultDataTypes } - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 } diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt index 22ac5401041e..8c86a2dc2e73 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvDestinationAcceptanceTest.kt @@ -22,9 +22,7 @@ import org.apache.commons.csv.CSVRecord import org.apache.commons.csv.QuoteMode abstract class GcsBaseCsvDestinationAcceptanceTest : GcsDestinationAcceptanceTest(S3Format.CSV) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = @@ -41,9 +39,9 @@ abstract class GcsBaseCsvDestinationAcceptanceTest : GcsDestinationAcceptanceTes @Throws(IOException::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { val objectSummaries = getAllSyncedObjects(streamName, namespace) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt index 1c436609cf33..6b9347abd1f3 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseCsvGzipDestinationAcceptanceTest.kt @@ -16,9 +16,7 @@ import java.util.Map import java.util.zip.GZIPInputStream abstract class GcsBaseCsvGzipDestinationAcceptanceTest : GcsBaseCsvDestinationAcceptanceTest() { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = // config without compression defaults to GZIP diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt index eb87cf87a238..b1d8d1d165e5 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlDestinationAcceptanceTest.kt @@ -20,9 +20,7 @@ import kotlin.collections.MutableList abstract class GcsBaseJsonlDestinationAcceptanceTest : GcsDestinationAcceptanceTest(S3Format.JSONL) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = @@ -37,9 +35,9 @@ abstract class GcsBaseJsonlDestinationAcceptanceTest : @Throws(IOException::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { val objectSummaries = getAllSyncedObjects(streamName, namespace) diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt index 8c57e6926010..746e37e32617 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseJsonlGzipDestinationAcceptanceTest.kt @@ -15,9 +15,7 @@ import java.util.Map import java.util.zip.GZIPInputStream abstract class GcsBaseJsonlGzipDestinationAcceptanceTest : GcsBaseJsonlDestinationAcceptanceTest() { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = // config without compression defaults to GZIP diff --git a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt index f5849f7e6b81..36aa8f28a489 100644 --- a/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/gcs-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/gcs/GcsBaseParquetDestinationAcceptanceTest.kt @@ -26,26 +26,22 @@ import org.apache.parquet.hadoop.ParquetReader abstract class GcsBaseParquetDestinationAcceptanceTest : GcsAvroParquetDestinationAcceptanceTest(S3Format.PARQUET) { - override fun getProtocolVersion(): ProtocolVersion { - return ProtocolVersion.V1 - } + override fun getProtocolVersion() = ProtocolVersion.V1 override val formatConfig: JsonNode? get() = Jsons.jsonNode(java.util.Map.of("format_type", "Parquet", "compression_codec", "GZIP")) - override fun getTestDataComparator(): TestDataComparator { - return GcsAvroTestDataComparator() - } + override fun getTestDataComparator(): TestDataComparator = GcsAvroTestDataComparator() @Throws(IOException::class, URISyntaxException::class) override fun retrieveRecords( - testEnv: TestDestinationEnv, - streamName: String, - namespace: String, + testEnv: TestDestinationEnv?, + streamName: String?, + namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -87,7 +83,7 @@ abstract class GcsBaseParquetDestinationAcceptanceTest : val `object` = s3Client!!.getObject(objectSummary!!.bucketName, objectSummary.key) val uri = URI(String.format("s3a://%s/%s", `object`.bucketName, `object`.key)) val path = Path(uri) - val hadoopConfig = getHadoopConfig(config!!) + val hadoopConfig = getHadoopConfig(config) ParquetReader.builder(AvroReadSupport(), path) .withConf(hadoopConfig) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt index 2d71a152df19..d4364f0f4c14 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/BaseS3Destination.kt @@ -58,7 +58,7 @@ protected constructor( override fun getConsumer( config: JsonNode, catalog: ConfiguredAirbyteCatalog, - outputRecordCollector: Consumer? + outputRecordCollector: Consumer ): AirbyteMessageConsumer? { val s3Config = configFactory.getS3DestinationConfig(config, storageProvider()) return S3ConsumerFactory() diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt index 88af39e8611b..c12a5ba57f4d 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.kt @@ -28,7 +28,7 @@ import org.slf4j.LoggerFactory class S3ConsumerFactory { fun create( - outputRecordCollector: Consumer?, + outputRecordCollector: Consumer, storageOperations: BlobStorageOperations, namingResolver: NamingConventionTransformer, onCreateBuffer: BufferCreateFunction, diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt index d876ed445a80..6615a4563968 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt @@ -30,9 +30,9 @@ internal class AsyncFlush( private val stagingOperations: StagingOperations?, private val database: JdbcDatabase?, private val catalog: ConfiguredAirbyteCatalog?, - private val typerDeduperValve: TypeAndDedupeOperationValve?, - private val typerDeduper: - TyperDeduper?, // In general, this size is chosen to improve the performance of lower memory + private val typerDeduperValve: TypeAndDedupeOperationValve, + private val typerDeduper: TyperDeduper, + // In general, this size is chosen to improve the performance of lower memory // connectors. With 1 Gi // of // resource the connector will usually at most fill up around 150 MB in a single queue. By @@ -46,7 +46,7 @@ internal class AsyncFlush( streamDescToWriteConfig @Throws(Exception::class) - override fun flush(decs: StreamDescriptor, stream: Stream) { + override fun flush(decs: StreamDescriptor, stream: Stream) { val writer: CsvSerializedBuffer try { writer = @@ -94,26 +94,35 @@ internal class AsyncFlush( val schemaName: String = writeConfig.outputSchemaName val stageName = stagingOperations!!.getStageName(schemaName, writeConfig.outputTableName) val stagingPath = - stagingOperations.getStagingPath( - GeneralStagingFunctions.RANDOM_CONNECTION_ID, - schemaName, - writeConfig.streamName, - writeConfig.outputTableName, - writeConfig.writeDatetime) + stagingOperations.getStagingPath( + GeneralStagingFunctions.RANDOM_CONNECTION_ID, + schemaName, + writeConfig.streamName, + writeConfig.outputTableName, + writeConfig.writeDatetime + ) try { val stagedFile = stagingOperations.uploadRecordsToStage( database, - stageName, - stagingPath, - List.of(stagedFile), - writeConfig.outputTableName, + writer, schemaName, - stagingOperations, - writeConfig.namespace, - writeConfig.streamName, - typerDeduperValve, - typerDeduper) + stageName, + stagingPath + ) + GeneralStagingFunctions.copyIntoTableFromStage( + database, + stageName, + stagingPath, + List.of(stagedFile), + writeConfig.outputTableName, + schemaName, + stagingOperations, + writeConfig.namespace, + writeConfig.streamName, + typerDeduperValve, + typerDeduper + ) } catch (e: Exception) { logger.error("Failed to flush and commit buffer data into destination's raw table", e) throw RuntimeException("Failed to upload buffer to stage and commit to destination", e) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt index 7b31c315b53a..09125c12b016 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.kt @@ -111,6 +111,10 @@ private constructor( } fun createAsync(): SerializedAirbyteMessageConsumer { + val typerDeduper = this.typerDeduper!! + val typerDeduperValve = this.typerDeduperValve!! + val stagingOperations = this.stagingOperations!! + val writeConfigs: List = createWriteConfigs( namingResolver, @@ -135,7 +139,7 @@ private constructor( return AsyncStreamConsumer( outputRecordCollector!!, GeneralStagingFunctions.onStartFunction( - database, + database!!, stagingOperations, writeConfigs, typerDeduper @@ -170,13 +174,13 @@ private constructor( fun builder( outputRecordCollector: Consumer, database: JdbcDatabase?, - stagingOperations: StagingOperations?, + stagingOperations: StagingOperations, namingResolver: NamingConventionTransformer?, config: JsonNode?, catalog: ConfiguredAirbyteCatalog, purgeStagingData: Boolean, - typerDeduperValve: TypeAndDedupeOperationValve?, - typerDeduper: TyperDeduper?, + typerDeduperValve: TypeAndDedupeOperationValve, + typerDeduper: TyperDeduper, parsedCatalog: ParsedCatalog?, defaultNamespace: String?, useDestinationsV2Columns: Boolean @@ -226,7 +230,15 @@ private constructor( val message = String.format( "You are trying to write multiple streams to the same table. Consider switching to a custom namespace format using \${SOURCE_NAMESPACE}, or moving one of them into a separate connection with a different stream prefix. Affected streams: %s", - conflictingStreams.stream().map(Function { config: WriteConfig -> config.namespace + "." + config.streamName }).collect(Collectors.joining(", "))) + conflictingStreams + .stream() + .map( + Function { config: WriteConfig -> + config.namespace + "." + config.streamName + } + ) + .collect(Collectors.joining(", ")) + ) throw ConfigErrorException(message) } return streamDescToWriteConfig diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt index 019550b1664d..3a5c2572dccc 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetDestinationAcceptanceTest.kt @@ -31,7 +31,7 @@ abstract class S3AvroParquetDestinationAcceptanceTest protected constructor(s3Fo val catalog = readCatalogFromFile(catalogFileName) val messages = readMessagesFromFile(messagesFileName) - val config = getConfig() + val config = this.getConfig() val defaultSchema = getDefaultSchema(config!!) val configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog) runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt index f556d92e130c..676e9f98cf17 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3AvroParquetTestDataComparator.kt @@ -8,10 +8,7 @@ import java.time.* import java.time.format.DateTimeFormatter class S3AvroParquetTestDataComparator : AdvancedTestDataComparator() { - override fun compareDateValues( - airbyteMessageValue: String?, - destinationValue: String - ): Boolean { + override fun compareDateValues(airbyteMessageValue: String, destinationValue: String): Boolean { val destinationDate = LocalDate.ofEpochDay(destinationValue.toLong()) val expectedDate = LocalDate.parse( @@ -30,7 +27,7 @@ class S3AvroParquetTestDataComparator : AdvancedTestDataComparator() { } override fun compareDateTimeValues( - airbyteMessageValue: String?, + airbyteMessageValue: String, destinationValue: String ): Boolean { val format = DateTimeFormatter.ofPattern(AdvancedTestDataComparator.AIRBYTE_DATETIME_FORMAT) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt index 483008284132..f346315b7e66 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseAvroDestinationAcceptanceTest.kt @@ -7,6 +7,7 @@ import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.ObjectReader import io.airbyte.cdk.integrations.destination.s3.avro.AvroConstants import io.airbyte.cdk.integrations.destination.s3.util.AvroRecordHelper +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator import io.airbyte.commons.json.Jsons import java.util.* import org.apache.avro.Schema @@ -38,11 +39,12 @@ abstract class S3BaseAvroDestinationAcceptanceTest protected constructor() : @Throws(Exception::class) override fun retrieveRecords( testEnv: TestDestinationEnv?, - streamName: String, + streamName: String?, namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = AvroRecordHelper.getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = + AvroRecordHelper.getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -69,7 +71,7 @@ abstract class S3BaseAvroDestinationAcceptanceTest protected constructor() : return jsonRecords } - override fun getTestDataComparator() = S3AvroParquetTestDataComparator() + override fun getTestDataComparator(): TestDataComparator = S3AvroParquetTestDataComparator() @Throws(Exception::class) override fun retrieveDataTypesFromPersistedFiles( diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt index 7d799c14c69e..381e75113e62 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseJsonlDestinationAcceptanceTest.kt @@ -37,7 +37,7 @@ abstract class S3BaseJsonlDestinationAcceptanceTest protected constructor() : testEnv: TestDestinationEnv?, streamName: String?, namespace: String?, - streamSchema: JsonNode? + streamSchema: JsonNode ): List { val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt index cb746b91d4c3..401e40a15061 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3BaseParquetDestinationAcceptanceTest.kt @@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.ObjectReader import io.airbyte.cdk.integrations.destination.s3.avro.AvroConstants import io.airbyte.cdk.integrations.destination.s3.parquet.S3ParquetWriter import io.airbyte.cdk.integrations.destination.s3.util.AvroRecordHelper +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator import io.airbyte.commons.json.Jsons import java.io.IOException import java.net.URI @@ -28,11 +29,12 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : @Throws(IOException::class, URISyntaxException::class) override fun retrieveRecords( testEnv: TestDestinationEnv?, - streamName: String, + streamName: String?, namespace: String?, streamSchema: JsonNode ): List { - val nameUpdater = AvroRecordHelper.getFieldNameUpdater(streamName, namespace, streamSchema) + val nameUpdater = + AvroRecordHelper.getFieldNameUpdater(streamName!!, namespace, streamSchema) val objectSummaries = getAllSyncedObjects(streamName, namespace) val jsonRecords: MutableList = LinkedList() @@ -41,7 +43,7 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : val `object` = s3Client!!.getObject(objectSummary!!.bucketName, objectSummary.key) val uri = URI(String.format("s3a://%s/%s", `object`.bucketName, `object`.key)) val path = Path(uri) - val hadoopConfig = S3ParquetWriter.getHadoopConfig(config) + val hadoopConfig = S3ParquetWriter.getHadoopConfig(s3DestinationConfig) ParquetReader.builder(AvroReadSupport(), path) .withConf(hadoopConfig) @@ -62,7 +64,7 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : return jsonRecords } - override fun getTestDataComparator() = S3AvroParquetTestDataComparator() + override fun getTestDataComparator(): TestDataComparator = S3AvroParquetTestDataComparator() @Throws(Exception::class) override fun retrieveDataTypesFromPersistedFiles( @@ -76,7 +78,7 @@ abstract class S3BaseParquetDestinationAcceptanceTest protected constructor() : val `object` = s3Client!!.getObject(objectSummary!!.bucketName, objectSummary.key) val uri = URI(String.format("s3a://%s/%s", `object`.bucketName, `object`.key)) val path = Path(uri) - val hadoopConfig = S3ParquetWriter.getHadoopConfig(config) + val hadoopConfig = S3ParquetWriter.getHadoopConfig(s3DestinationConfig) ParquetReader.builder(AvroReadSupport(), path) .withConf(hadoopConfig) diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt index 6ab0884c23d1..b3e2b4cd51ad 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/testFixtures/kotlin/io/airbyte/cdk/integrations/destination/s3/S3DestinationAcceptanceTest.kt @@ -13,6 +13,7 @@ import io.airbyte.cdk.integrations.destination.NamingConventionTransformer import io.airbyte.cdk.integrations.destination.s3.util.S3NameTransformer import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator import io.airbyte.commons.io.IOs import io.airbyte.commons.jackson.MoreMappers import io.airbyte.commons.json.Jsons @@ -39,9 +40,9 @@ abstract class S3DestinationAcceptanceTest protected constructor(protected val outputFormat: S3Format) : DestinationAcceptanceTest() { protected val secretFilePath: String = "secrets/config.json" protected var configJson: JsonNode? = null - protected lateinit var config: S3DestinationConfig + protected lateinit var s3DestinationConfig: S3DestinationConfig protected var s3Client: AmazonS3? = null - protected lateinit var nameTransformer: NamingConventionTransformer + protected lateinit var s3nameTransformer: NamingConventionTransformer protected var s3StorageOperations: S3StorageOperations? = null protected val baseConfigJson: JsonNode @@ -54,6 +55,8 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta return null } + override fun getConfig(): JsonNode = configJson!! + override fun getFailCheckConfig(): JsonNode { val baseJson = baseConfigJson val failCheckJson = Jsons.clone(baseJson) @@ -68,20 +71,20 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta streamName: String?, namespace: String? ): List { - val namespaceStr = nameTransformer!!.getNamespace(namespace!!) - val streamNameStr = nameTransformer!!.getIdentifier(streamName!!) + val namespaceStr = s3nameTransformer!!.getNamespace(namespace!!) + val streamNameStr = s3nameTransformer!!.getIdentifier(streamName!!) val outputPrefix = s3StorageOperations!!.getBucketObjectPath( namespaceStr, streamNameStr, DateTime.now(DateTimeZone.UTC), - config!!.pathFormat + s3DestinationConfig!!.pathFormat ) // the child folder contains a non-deterministic epoch timestamp, so use the parent folder val parentFolder = outputPrefix.substring(0, outputPrefix.lastIndexOf("/") + 1) val objectSummaries = s3Client!! - .listObjects(config!!.bucketName, parentFolder) + .listObjects(s3DestinationConfig!!.bucketName, parentFolder) .objectSummaries .stream() .filter { o: S3ObjectSummary -> o.key.contains("$streamNameStr/") } @@ -105,7 +108,7 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta * * Construct the S3 destination config. * * Construct the S3 client. */ - override fun setup(testEnv: TestDestinationEnv?, TEST_SCHEMAS: HashSet?) { + override fun setup(testEnv: TestDestinationEnv, TEST_SCHEMAS: HashSet) { val baseConfigJson = baseConfigJson // Set a random s3 bucket path for each integration test val configJson = Jsons.clone(baseConfigJson) @@ -119,19 +122,27 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta .put("s3_bucket_path", testBucketPath) .set("format", formatConfig) this.configJson = configJson - this.config = S3DestinationConfig.getS3DestinationConfig(configJson, storageProvider()) - LOGGER.info("Test full path: {}/{}", config.bucketName, config.bucketPath) + this.s3DestinationConfig = + S3DestinationConfig.getS3DestinationConfig(configJson, storageProvider()) + LOGGER.info( + "Test full path: {}/{}", + s3DestinationConfig.bucketName, + s3DestinationConfig.bucketPath + ) - this.s3Client = config.getS3Client() - this.nameTransformer = S3NameTransformer() - this.s3StorageOperations = S3StorageOperations(nameTransformer, s3Client!!, config) + this.s3Client = s3DestinationConfig.getS3Client() + this.s3nameTransformer = S3NameTransformer() + this.s3StorageOperations = + S3StorageOperations(s3nameTransformer, s3Client!!, s3DestinationConfig) } /** Remove all the S3 output from the tests. */ - override fun tearDown(testEnv: TestDestinationEnv?) { + override fun tearDown(testEnv: TestDestinationEnv) { val keysToDelete: MutableList = LinkedList() val objects = - s3Client!!.listObjects(config!!.bucketName, config!!.bucketPath).objectSummaries + s3Client!! + .listObjects(s3DestinationConfig!!.bucketName, s3DestinationConfig!!.bucketPath) + .objectSummaries for (`object` in objects) { keysToDelete.add(DeleteObjectsRequest.KeyVersion(`object`.key)) } @@ -139,18 +150,18 @@ protected constructor(protected val outputFormat: S3Format) : DestinationAccepta if (keysToDelete.size > 0) { LOGGER.info( "Tearing down test bucket path: {}/{}", - config!!.bucketName, - config!!.bucketPath + s3DestinationConfig!!.bucketName, + s3DestinationConfig!!.bucketPath ) val result = s3Client!!.deleteObjects( - DeleteObjectsRequest(config!!.bucketName).withKeys(keysToDelete) + DeleteObjectsRequest(s3DestinationConfig!!.bucketName).withKeys(keysToDelete) ) LOGGER.info("Deleted {} file(s).", result.deletedObjects.size) } } - override fun getTestDataComparator() = AdvancedTestDataComparator() + override fun getTestDataComparator(): TestDataComparator = AdvancedTestDataComparator() override fun supportBasicDataTypeTest(): Boolean { return true diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt index 8b05e88e7a15..ae080b8162ed 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseDestinationV1V2Migrator.kt @@ -196,7 +196,7 @@ abstract class BaseDestinationV1V2Migrator : Destination */ abstract fun schemaMatchesExpectation( existingTable: DialectTableDefinition, - columns: Collection? + columns: Collection ): Boolean /** @@ -219,7 +219,7 @@ abstract class BaseDestinationV1V2Migrator : Destination * @param streamConfig the stream in question * @return the valid v1 name and namespace for the same stream */ - abstract fun convertToV1RawName(streamConfig: StreamConfig?): NamespacedTableName + abstract fun convertToV1RawName(streamConfig: StreamConfig): NamespacedTableName companion object { protected val LOGGER: Logger = diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt index 7820645b0596..c52bb15e4e7c 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParser.kt @@ -122,7 +122,7 @@ constructor( val primaryKey = stream.primaryKey .stream() - .map { key: List -> sqlGenerator.buildColumnId(key[0]) } + .map { key: List -> sqlGenerator.buildColumnId(key[0]) } .toList() require(stream.cursorField.size <= 1) { "Only top-level cursors are supported" } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt index 5373fe894cba..d1e33220c4b0 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.kt @@ -377,7 +377,7 @@ class DefaultTyperDeduper( } @Throws(Exception::class) - override fun typeAndDedupe(streamSyncSummaries: Map) { + override fun typeAndDedupe(streamSyncSummaries: Map) { LOGGER.info("Typing and deduping all tables") val typeAndDedupeTasks: MutableSet>> = HashSet() parsedCatalog.streams diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt index b86be15a360b..69802466706c 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationHandler.kt @@ -4,7 +4,7 @@ package io.airbyte.integrations.base.destination.typing_deduping interface DestinationHandler { - @Throws(Exception::class) fun execute(sql: Sql?) + @Throws(Exception::class) fun execute(sql: Sql) /** * Fetch the current state of the destination for the given streams. This method MUST create the @@ -14,9 +14,9 @@ interface DestinationHandler { */ @Throws(Exception::class) fun gatherInitialState( - streamConfigs: List? + streamConfigs: List ): List> @Throws(Exception::class) - fun commitDestinationStates(destinationStates: Map?) + fun commitDestinationStates(destinationStates: Map) } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt index 4bc3b4e24f0b..60cfbca93809 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoOpTyperDeduperWithV1V2Migrations.kt @@ -94,7 +94,7 @@ class NoOpTyperDeduperWithV1V2Migrations) { + override fun typeAndDedupe(streamSyncSummaries: Map) { log.info("Skipping TypeAndDedupe final") } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt index 8c83383ae9e9..26df693cf76c 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/NoopTyperDeduper.kt @@ -48,7 +48,7 @@ class NoopTyperDeduper : TyperDeduper { override fun commitFinalTables() {} - override fun typeAndDedupe(streamSyncSummaries: Map) {} + override fun typeAndDedupe(streamSyncSummaries: Map) {} override fun cleanup() {} } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt index 124fa99c91a0..cbf522367509 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/SqlGenerator.kt @@ -7,13 +7,13 @@ import java.time.Instant import java.util.* interface SqlGenerator { - fun buildStreamId(namespace: String?, name: String?, rawNamespaceOverride: String?): StreamId + fun buildStreamId(namespace: String, name: String, rawNamespaceOverride: String): StreamId - fun buildColumnId(name: String?): ColumnId { + fun buildColumnId(name: String): ColumnId { return buildColumnId(name, "") } - fun buildColumnId(name: String?, suffix: String?): ColumnId + fun buildColumnId(name: String, suffix: String?): ColumnId /** * Generate a SQL statement to create a fresh table to match the given stream. @@ -26,7 +26,7 @@ interface SqlGenerator { * the table already exists. If you're passing a non-empty prefix, you likely want to set this * to true. */ - fun createTable(stream: StreamConfig?, suffix: String?, force: Boolean): Sql + fun createTable(stream: StreamConfig, suffix: String, force: Boolean): Sql /** * Used to create either the airbyte_internal or final schemas if they don't exist @@ -64,11 +64,11 @@ interface SqlGenerator { * which handles casting exceptions. */ fun updateTable( - stream: StreamConfig?, + stream: StreamConfig, finalSuffix: String?, minRawTimestamp: Optional, useExpensiveSaferCasting: Boolean - ): Sql? + ): Sql /** * Drop the previous final table, and rename the new final table to match the old final table. @@ -76,7 +76,7 @@ interface SqlGenerator { * This method may assume that the stream is an OVERWRITE stream, and that the final suffix is * non-empty. Callers are responsible for verifying those are true. */ - fun overwriteFinalTable(stream: StreamId?, finalSuffix: String?): Sql? + fun overwriteFinalTable(stream: StreamId, finalSuffix: String?): Sql /** * Creates a sql query which will create a v2 raw table from the v1 raw table, then performs a @@ -87,20 +87,20 @@ interface SqlGenerator { * @param tableName name of the v2 raw table * @return a string containing the necessary sql to migrate */ - fun migrateFromV1toV2(streamId: StreamId?, namespace: String?, tableName: String?): Sql? + fun migrateFromV1toV2(streamId: StreamId, namespace: String?, tableName: String?): Sql /** * Typically we need to create a soft reset temporary table and clear loaded at values * * @return */ - fun prepareTablesForSoftReset(stream: StreamConfig): Sql? { + fun prepareTablesForSoftReset(stream: StreamConfig): Sql { val createTempTable = createTable(stream, TypeAndDedupeTransaction.SOFT_RESET_SUFFIX, true) val clearLoadedAt = clearLoadedAt(stream.id) return Sql.Companion.concat(createTempTable, clearLoadedAt) } - fun clearLoadedAt(streamId: StreamId?): Sql + fun clearLoadedAt(streamId: StreamId): Sql /** * Implementation specific if there is no option to retry again with safe casted SQL or the diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt index 4ad8c84932ab..60a8fb24fe75 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/TyperDeduper.kt @@ -99,7 +99,7 @@ interface TyperDeduper { * [StreamSyncSummary.DEFAULT]. */ @Throws(Exception::class) - fun typeAndDedupe(streamSyncSummaries: Map) + fun typeAndDedupe(streamSyncSummaries: Map) @Throws(Exception::class) fun commitFinalTables() diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt index 61d53864d036..4229facf0b95 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/CatalogParserTest.kt @@ -12,9 +12,9 @@ import java.util.List import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test -import org.mockito.ArgumentMatchers import org.mockito.Mockito import org.mockito.invocation.InvocationOnMock +import org.mockito.kotlin.any internal class CatalogParserTest { private lateinit var sqlGenerator: SqlGenerator @@ -24,31 +24,18 @@ internal class CatalogParserTest { fun setup() { sqlGenerator = Mockito.mock(SqlGenerator::class.java) // noop quoting logic - Mockito.`when`(sqlGenerator.buildColumnId(ArgumentMatchers.any())).thenAnswer { - invocation: InvocationOnMock -> + Mockito.`when`(sqlGenerator.buildColumnId(any())).thenAnswer { invocation: InvocationOnMock + -> val fieldName = invocation.getArgument(0) ColumnId(fieldName, fieldName, fieldName) } - Mockito.`when`( - sqlGenerator.buildStreamId( - ArgumentMatchers.any(), - ArgumentMatchers.any(), - ArgumentMatchers.any() - ) - ) - .thenAnswer { invocation: InvocationOnMock -> - val namespace = invocation.getArgument(0) - val name = invocation.getArgument(1) - val rawNamespace = invocation.getArgument(1) - StreamId( - namespace, - name, - rawNamespace, - namespace + "_abab_" + name, - namespace, - name - ) - } + Mockito.`when`(sqlGenerator.buildStreamId(any(), any(), any())).thenAnswer { + invocation: InvocationOnMock -> + val namespace = invocation.getArgument(0) + val name = invocation.getArgument(1) + val rawNamespace = invocation.getArgument(1) + StreamId(namespace, name, rawNamespace, namespace + "_abab_" + name, namespace, name) + } parser = CatalogParser(sqlGenerator) } @@ -59,29 +46,23 @@ internal class CatalogParserTest { */ @Test fun finalNameCollision() { - Mockito.`when`( - sqlGenerator!!.buildStreamId( - ArgumentMatchers.any(), - ArgumentMatchers.any(), - ArgumentMatchers.any() - ) - ) - .thenAnswer { invocation: InvocationOnMock -> - val originalNamespace = invocation.getArgument(0) - val originalName = (invocation.getArgument(1)) - val originalRawNamespace = (invocation.getArgument(1)) + Mockito.`when`(sqlGenerator!!.buildStreamId(any(), any(), any())).thenAnswer { + invocation: InvocationOnMock -> + val originalNamespace = invocation.getArgument(0) + val originalName = (invocation.getArgument(1)) + val originalRawNamespace = (invocation.getArgument(1)) - // emulate quoting logic that causes a name collision - val quotedName = originalName.replace("bar".toRegex(), "") - StreamId( - originalNamespace, - quotedName, - originalRawNamespace, - originalNamespace + "_abab_" + quotedName, - originalNamespace, - originalName - ) - } + // emulate quoting logic that causes a name collision + val quotedName = originalName.replace("bar".toRegex(), "") + StreamId( + originalNamespace, + quotedName, + originalRawNamespace, + originalNamespace + "_abab_" + quotedName, + originalNamespace, + originalName + ) + } val catalog = ConfiguredAirbyteCatalog() .withStreams(List.of(stream("a", "foobarfoo"), stream("a", "foofoo"))) @@ -100,13 +81,13 @@ internal class CatalogParserTest { */ @Test fun columnNameCollision() { - Mockito.`when`(sqlGenerator!!.buildColumnId(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenAnswer { invocation: InvocationOnMock -> - val originalName = invocation.getArgument(0) - // emulate quoting logic that causes a name collision - val quotedName = originalName.replace("bar".toRegex(), "") - ColumnId(quotedName, originalName, quotedName) - } + Mockito.`when`(sqlGenerator!!.buildColumnId(any(), any())).thenAnswer { + invocation: InvocationOnMock -> + val originalName = invocation.getArgument(0) + // emulate quoting logic that causes a name collision + val quotedName = originalName.replace("bar".toRegex(), "") + ColumnId(quotedName, originalName, quotedName) + } val schema = Jsons.deserialize( """ diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt index fbd5a43168ea..9d56a7e5d23b 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduperTest.kt @@ -26,6 +26,7 @@ import org.junit.jupiter.api.Test import org.mockito.ArgumentMatchers import org.mockito.Mockito import org.mockito.Mockito.mock +import org.mockito.kotlin.any class DefaultTyperDeduperTest { private var parsedCatalog: ParsedCatalog? = null @@ -51,7 +52,7 @@ class DefaultTyperDeduperTest { private lateinit var destinationHandler: DestinationHandler private lateinit var initialStates: List> - private lateinit var updatedStates: MutableMap + private lateinit var updatedStates: MutableMap private lateinit var migrator: DestinationV1V2Migrator private lateinit var typerDeduper: TyperDeduper @@ -206,7 +207,7 @@ class DefaultTyperDeduperTest { Mockito.clearInvocations(destinationHandler) typerDeduper!!.commitFinalTables() - Mockito.verify(destinationHandler, Mockito.never()).execute(ArgumentMatchers.any()) + Mockito.verify(destinationHandler, Mockito.never()).execute(any()) } /** @@ -314,7 +315,7 @@ class DefaultTyperDeduperTest { Mockito.clearInvocations(destinationHandler) typerDeduper!!.prepareFinalTables() - Mockito.verify(destinationHandler, Mockito.never()).execute(ArgumentMatchers.any()) + Mockito.verify(destinationHandler, Mockito.never()).execute(any()) } /** @@ -466,9 +467,7 @@ class DefaultTyperDeduperTest { @Test @Throws(Exception::class) fun failedSetup() { - Mockito.doThrow(RuntimeException("foo")) - .`when`(destinationHandler) - .execute(ArgumentMatchers.any()) + Mockito.doThrow(RuntimeException("foo")).`when`(destinationHandler).execute(any()) Assertions.assertThrows(Exception::class.java) { typerDeduper!!.prepareFinalTables() } Mockito.clearInvocations(destinationHandler) @@ -636,7 +635,7 @@ class DefaultTyperDeduperTest { MockState(true, true, true) ) ) - Mockito.verify(destinationHandler).gatherInitialState(ArgumentMatchers.any()) + Mockito.verify(destinationHandler).gatherInitialState(any()) Mockito.verify(destinationHandler) .execute( separately( @@ -756,7 +755,7 @@ class DefaultTyperDeduperTest { MockState(true, true, true) ) ) - Mockito.verify(destinationHandler).gatherInitialState(ArgumentMatchers.any()) + Mockito.verify(destinationHandler).gatherInitialState(any()) Mockito.verify(destinationHandler) .execute( separately( @@ -865,7 +864,7 @@ class DefaultTyperDeduperTest { MockState(true, false, false) ) ) - Mockito.verify(destinationHandler).gatherInitialState(ArgumentMatchers.any()) + Mockito.verify(destinationHandler).gatherInitialState(any()) Mockito.verify(destinationHandler) .execute( separately( diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt index 19a7dc6066d8..c427253bdf56 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationV1V2MigratorTest.kt @@ -15,9 +15,9 @@ import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.ArgumentsProvider import org.junit.jupiter.params.provider.ArgumentsSource -import org.mockito.ArgumentMatchers import org.mockito.Mockito import org.mockito.Mockito.mock +import org.mockito.kotlin.any import org.mockito.kotlin.spy class DestinationV1V2MigratorTest { @@ -113,7 +113,7 @@ class DestinationV1V2MigratorTest { migrator.migrate(sqlGenerator, handler, stream) Mockito.verify(handler).execute(sql) // Exception thrown when executing sql, TableNotMigratedException thrown - Mockito.doThrow(Exception::class.java).`when`(handler).execute(ArgumentMatchers.any()) + Mockito.doThrow(Exception::class.java).`when`(handler).execute(any()) val exception = Assertions.assertThrows(TableNotMigratedException::class.java) { migrator.migrate(sqlGenerator, handler, stream) @@ -136,7 +136,7 @@ class DestinationV1V2MigratorTest { v1RawTableSchemaMatches: Boolean ): BaseDestinationV1V2Migrator<*> { val migrator: BaseDestinationV1V2Migrator = spy() - Mockito.`when`(migrator.doesAirbyteInternalNamespaceExist(ArgumentMatchers.any())) + Mockito.`when`(migrator.doesAirbyteInternalNamespaceExist(any())) .thenReturn(v2NamespaceExists) val existingTable = if (v2TableExists) Optional.of("v2_raw") else Optional.empty() @@ -156,7 +156,7 @@ class DestinationV1V2MigratorTest { ) .thenReturn(v2RawSchemaMatches) - Mockito.`when`(migrator.convertToV1RawName(ArgumentMatchers.any())) + Mockito.`when`(migrator.convertToV1RawName(any())) .thenReturn(NamespacedTableName("v1_raw_namespace", "v1_raw_table")) val existingV1RawTable = if (v1RawTableExists) Optional.of("v1_raw") else Optional.empty() @@ -174,7 +174,13 @@ class DestinationV1V2MigratorTest { @Throws(Exception::class) fun noIssuesMigrator(): BaseDestinationV1V2Migrator<*> { - return makeMockMigrator(true, false, true, true, true) + return makeMockMigrator( + v2NamespaceExists = true, + v2TableExists = false, + v2RawSchemaMatches = true, + v1RawTableExists = true, + v1RawTableSchemaMatches = true + ) } } } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt index fffc08cc4e28..74265393ccff 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/typing_deduping/MockSqlGenerator.kt @@ -11,14 +11,14 @@ import java.util.function.Function /** Basic SqlGenerator mock. See [DefaultTyperDeduperTest] for example usage. */ internal class MockSqlGenerator : SqlGenerator { override fun buildStreamId( - namespace: String?, - name: String?, - rawNamespaceOverride: String? + namespace: String, + name: String, + rawNamespaceOverride: String ): StreamId { throw RuntimeException() } - override fun buildColumnId(name: String?, suffix: String?): ColumnId { + override fun buildColumnId(name: String, suffix: String?): ColumnId { throw RuntimeException() } @@ -26,16 +26,16 @@ internal class MockSqlGenerator : SqlGenerator { return of("CREATE SCHEMA $schema") } - override fun createTable(stream: StreamConfig?, suffix: String?, force: Boolean): Sql { + override fun createTable(stream: StreamConfig, suffix: String, force: Boolean): Sql { return of("CREATE TABLE " + stream!!.id.finalTableId("", suffix!!)) } override fun updateTable( - stream: StreamConfig?, + stream: StreamConfig, finalSuffix: String?, minRawTimestamp: Optional, useExpensiveSaferCasting: Boolean - ): Sql? { + ): Sql { val timestampFilter = minRawTimestamp .map(Function { timestamp: Instant? -> " WHERE extracted_at > $timestamp" }) @@ -48,7 +48,7 @@ internal class MockSqlGenerator : SqlGenerator { ) } - override fun overwriteFinalTable(stream: StreamId?, finalSuffix: String?): Sql? { + override fun overwriteFinalTable(stream: StreamId, finalSuffix: String?): Sql { return of( "OVERWRITE TABLE " + stream!!.finalTableId("") + @@ -58,10 +58,10 @@ internal class MockSqlGenerator : SqlGenerator { } override fun migrateFromV1toV2( - streamId: StreamId?, + streamId: StreamId, namespace: String?, tableName: String? - ): Sql? { + ): Sql { return of( "MIGRATE TABLE " + java.lang.String.join(".", namespace, tableName) + @@ -70,7 +70,7 @@ internal class MockSqlGenerator : SqlGenerator { ) } - override fun prepareTablesForSoftReset(stream: StreamConfig): Sql? { + override fun prepareTablesForSoftReset(stream: StreamConfig): Sql { return of( "PREPARE " + java.lang.String.join(".", stream.id.originalNamespace, stream.id.originalName) + @@ -78,7 +78,7 @@ internal class MockSqlGenerator : SqlGenerator { ) } - override fun clearLoadedAt(streamId: StreamId?): Sql { + override fun clearLoadedAt(streamId: StreamId): Sql { throw RuntimeException() } } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt index 264d3456db1f..3e09bef3cf0e 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt @@ -259,7 +259,8 @@ abstract class BaseSqlGeneratorIntegrationTest { - val initialState = destinationHandler!!.gatherInitialState(java.util.List.of(streamConfig)) + val initialState = + destinationHandler!!.gatherInitialState(java.util.List.of(streamConfig!!)) Assertions.assertEquals( 1, initialState!!.size, @@ -276,7 +277,7 @@ abstract class BaseSqlGeneratorIntegrationTest = + val FINAL_TABLE_COLUMN_NAMES: List = listOf( "_airbyte_raw_id", "_airbyte_extracted_at",